mirror of
https://github.com/anxms/fn_nas.git
synced 2025-10-15 17:48:29 +00:00
优化硬盘检测逻辑避免唤醒休眠硬盘
This commit is contained in:
@@ -313,11 +313,34 @@ class FlynasCoordinator(DataUpdateCoordinator):
|
|||||||
if connection_id is not None:
|
if connection_id is not None:
|
||||||
await self.release_ssh_connection(connection_id)
|
await self.release_ssh_connection(connection_id)
|
||||||
|
|
||||||
|
async def ping_system(self) -> bool:
|
||||||
|
"""轻量级系统状态检测"""
|
||||||
|
# 对于本地主机直接返回True
|
||||||
|
if self.host in ['localhost', '127.0.0.1']:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
# 使用异步ping检测,减少超时时间
|
||||||
|
proc = await asyncio.create_subprocess_exec(
|
||||||
|
'ping', '-c', '1', '-W', '1', self.host,
|
||||||
|
stdout=asyncio.subprocess.DEVNULL,
|
||||||
|
stderr=asyncio.subprocess.DEVNULL
|
||||||
|
)
|
||||||
|
await asyncio.wait_for(proc.wait(), timeout=2) # 总超时时间2秒
|
||||||
|
return proc.returncode == 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
async def _monitor_system_status(self):
|
async def _monitor_system_status(self):
|
||||||
"""系统离线时轮询检测状态"""
|
"""系统离线时轮询检测状态"""
|
||||||
self._debug_log(f"启动系统状态监控,每{self._retry_interval}秒检测一次")
|
self._debug_log(f"启动系统状态监控,每{self._retry_interval}秒检测一次")
|
||||||
|
|
||||||
|
# 使用指数退避策略,避免频繁检测
|
||||||
|
check_interval = self._retry_interval
|
||||||
|
max_interval = 300 # 最大5分钟检测一次
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
await asyncio.sleep(self._retry_interval)
|
await asyncio.sleep(check_interval)
|
||||||
|
|
||||||
if await self.ping_system():
|
if await self.ping_system():
|
||||||
self._info_log("检测到系统已开机,触发重新加载")
|
self._info_log("检测到系统已开机,触发重新加载")
|
||||||
@@ -326,24 +349,10 @@ class FlynasCoordinator(DataUpdateCoordinator):
|
|||||||
self.hass.config_entries.async_reload(self.config_entry.entry_id)
|
self.hass.config_entries.async_reload(self.config_entry.entry_id)
|
||||||
)
|
)
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
async def ping_system(self) -> bool:
|
# 系统仍然离线,增加检测间隔(指数退避)
|
||||||
"""轻量级系统状态检测"""
|
check_interval = min(check_interval * 1.5, max_interval)
|
||||||
# 对于本地主机直接返回True
|
self._debug_log(f"系统仍离线,下次检测间隔: {check_interval}秒")
|
||||||
if self.host in ['localhost', '127.0.0.1']:
|
|
||||||
return True
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 使用异步ping检测
|
|
||||||
proc = await asyncio.create_subprocess_exec(
|
|
||||||
'ping', '-c', '1', '-W', '1', self.host,
|
|
||||||
stdout=asyncio.subprocess.DEVNULL,
|
|
||||||
stderr=asyncio.subprocess.DEVNULL
|
|
||||||
)
|
|
||||||
await proc.wait()
|
|
||||||
return proc.returncode == 0
|
|
||||||
except Exception:
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def _async_update_data(self):
|
async def _async_update_data(self):
|
||||||
"""数据更新入口,优化命令执行频率"""
|
"""数据更新入口,优化命令执行频率"""
|
||||||
|
@@ -14,6 +14,7 @@ class DiskManager:
|
|||||||
self.disk_full_info_cache = {} # 缓存磁盘完整信息
|
self.disk_full_info_cache = {} # 缓存磁盘完整信息
|
||||||
self.first_run = True # 首次运行标志
|
self.first_run = True # 首次运行标志
|
||||||
self.initial_detection_done = False # 首次完整检测完成标志
|
self.initial_detection_done = False # 首次完整检测完成标志
|
||||||
|
self.disk_io_stats_cache = {} # 缓存磁盘I/O统计信息
|
||||||
|
|
||||||
def extract_value(self, text: str, patterns, default="未知", format_func=None):
|
def extract_value(self, text: str, patterns, default="未知", format_func=None):
|
||||||
if not text:
|
if not text:
|
||||||
@@ -38,10 +39,9 @@ class DiskManager:
|
|||||||
async def check_disk_active(self, device: str, window: int = 30) -> bool:
|
async def check_disk_active(self, device: str, window: int = 30) -> bool:
|
||||||
"""检查硬盘在指定时间窗口内是否有活动"""
|
"""检查硬盘在指定时间窗口内是否有活动"""
|
||||||
try:
|
try:
|
||||||
# 正确的路径是 /sys/block/{device}/stat
|
|
||||||
stat_path = f"/sys/block/{device}/stat"
|
stat_path = f"/sys/block/{device}/stat"
|
||||||
|
|
||||||
# 读取统计文件
|
# 读取当前统计文件
|
||||||
stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null")
|
stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null")
|
||||||
if not stat_output:
|
if not stat_output:
|
||||||
self.logger.debug(f"无法读取 {stat_path},默认返回活跃状态")
|
self.logger.debug(f"无法读取 {stat_path},默认返回活跃状态")
|
||||||
@@ -53,52 +53,148 @@ class DiskManager:
|
|||||||
self.logger.debug(f"无效的统计信息格式:{stat_output}")
|
self.logger.debug(f"无效的统计信息格式:{stat_output}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 关键字段:当前正在进行的I/O操作数量(第9个字段,索引8)
|
try:
|
||||||
in_flight = int(stats[8])
|
# /sys/block/{device}/stat 字段说明:
|
||||||
|
# 0: read I/Os requests 读请求次数
|
||||||
|
# 1: read I/Os merged 读请求合并次数
|
||||||
|
# 2: read sectors 读扇区数
|
||||||
|
# 3: read ticks 读操作耗时(ms)
|
||||||
|
# 4: write I/Os requests 写请求次数
|
||||||
|
# 5: write I/Os merged 写请求合并次数
|
||||||
|
# 6: write sectors 写扇区数
|
||||||
|
# 7: write ticks 写操作耗时(ms)
|
||||||
|
# 8: in_flight 当前进行中的I/O请求数
|
||||||
|
# 9: io_ticks I/O活动时间(ms)
|
||||||
|
# 10: time_in_queue 队列中的总时间(ms)
|
||||||
|
|
||||||
# 如果当前有I/O操作,直接返回活跃状态
|
current_stats = {
|
||||||
if in_flight > 0:
|
'read_ios': int(stats[0]),
|
||||||
|
'write_ios': int(stats[4]),
|
||||||
|
'in_flight': int(stats[8]),
|
||||||
|
'io_ticks': int(stats[9])
|
||||||
|
}
|
||||||
|
|
||||||
|
# 如果当前有正在进行的I/O操作,直接返回活跃状态
|
||||||
|
if current_stats['in_flight'] > 0:
|
||||||
|
self.logger.debug(f"磁盘 {device} 有正在进行的I/O操作: {current_stats['in_flight']}")
|
||||||
|
self.disk_io_stats_cache[device] = current_stats
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 检查I/O操作时间(第10个字段,索引9) - io_ticks(单位毫秒)
|
# 检查是否有缓存的统计信息
|
||||||
io_ticks = int(stats[9])
|
cached_stats = self.disk_io_stats_cache.get(device)
|
||||||
|
|
||||||
# 如果设备在窗口时间内有I/O活动,返回活跃状态
|
if cached_stats:
|
||||||
if io_ticks > window * 1000:
|
# 比较I/O请求次数的变化
|
||||||
|
read_ios_diff = current_stats['read_ios'] - cached_stats['read_ios']
|
||||||
|
write_ios_diff = current_stats['write_ios'] - cached_stats['write_ios']
|
||||||
|
io_ticks_diff = current_stats['io_ticks'] - cached_stats['io_ticks']
|
||||||
|
|
||||||
|
self.logger.debug(f"磁盘 {device} I/O变化: 读={read_ios_diff}, 写={write_ios_diff}, 活动时间={io_ticks_diff}ms")
|
||||||
|
|
||||||
|
# 如果在检测窗口内有I/O活动,认为磁盘活跃
|
||||||
|
if read_ios_diff > 0 or write_ios_diff > 0 or io_ticks_diff > 100: # 100ms内的活动
|
||||||
|
self.logger.debug(f"磁盘 {device} 在窗口期内有I/O活动")
|
||||||
|
self.disk_io_stats_cache[device] = current_stats
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# 所有检查都通过,返回非活跃状态
|
# 检查io_ticks是否表明最近有活动
|
||||||
|
# io_ticks是累积值,如果在合理范围内增长,说明有轻微活动
|
||||||
|
if io_ticks_diff > 0 and io_ticks_diff < window * 1000: # 在窗口时间内的轻微活动
|
||||||
|
self.logger.debug(f"磁盘 {device} 有轻微I/O活动")
|
||||||
|
self.disk_io_stats_cache[device] = current_stats
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# 首次检测,保存当前状态并认为活跃
|
||||||
|
self.logger.debug(f"磁盘 {device} 首次检测,保存统计信息")
|
||||||
|
self.disk_io_stats_cache[device] = current_stats
|
||||||
|
return True
|
||||||
|
|
||||||
|
# 更新缓存
|
||||||
|
self.disk_io_stats_cache[device] = current_stats
|
||||||
|
|
||||||
|
# 检查硬盘电源状态
|
||||||
|
power_state = await self.get_disk_power_state(device)
|
||||||
|
if power_state in ["standby", "sleep", "idle"]:
|
||||||
|
self.logger.debug(f"磁盘 {device} 处于省电状态: {power_state}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# 所有检查都通过,返回非活跃状态
|
||||||
|
self.logger.debug(f"磁盘 {device} 判定为非活跃状态")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except (ValueError, IndexError) as e:
|
||||||
|
self.logger.debug(f"解析统计信息失败: {e}")
|
||||||
|
return True
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"检测硬盘活动状态失败: {str(e)}", exc_info=True)
|
self.logger.error(f"检测硬盘活动状态失败: {str(e)}")
|
||||||
return True # 出错时默认执行检测
|
return True # 出错时默认执行检测
|
||||||
|
|
||||||
async def get_disk_activity(self, device: str) -> str:
|
async def get_disk_power_state(self, device: str) -> str:
|
||||||
"""获取硬盘活动状态(活动中/空闲中/休眠中)"""
|
"""获取硬盘电源状态"""
|
||||||
try:
|
try:
|
||||||
# 检查硬盘是否处于休眠状态
|
# 检查 SCSI 设备状态
|
||||||
state_path = f"/sys/block/{device}/device/state"
|
state_path = f"/sys/block/{device}/device/state"
|
||||||
state_output = await self.coordinator.run_command(f"cat {state_path} 2>/dev/null || echo 'unknown'")
|
state_output = await self.coordinator.run_command(f"cat {state_path} 2>/dev/null || echo 'unknown'")
|
||||||
state = state_output.strip().lower()
|
state = state_output.strip().lower()
|
||||||
|
|
||||||
if state in ["standby", "sleep"]:
|
if state in ["running", "active"]:
|
||||||
|
return "active"
|
||||||
|
elif state in ["standby", "sleep"]:
|
||||||
|
return state
|
||||||
|
|
||||||
|
# 对于某些设备,尝试通过hdparm检查状态(非侵入性)
|
||||||
|
hdparm_output = await self.coordinator.run_command(f"hdparm -C /dev/{device} 2>/dev/null || echo 'unknown'")
|
||||||
|
if "standby" in hdparm_output.lower():
|
||||||
|
return "standby"
|
||||||
|
elif "sleeping" in hdparm_output.lower():
|
||||||
|
return "sleep"
|
||||||
|
elif "active/idle" in hdparm_output.lower():
|
||||||
|
return "active"
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.debug(f"获取磁盘 {device} 电源状态失败: {e}")
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
async def get_disk_activity(self, device: str) -> str:
|
||||||
|
"""获取硬盘活动状态(活动中/空闲中/休眠中)"""
|
||||||
|
try:
|
||||||
|
# 先检查电源状态
|
||||||
|
power_state = await self.get_disk_power_state(device)
|
||||||
|
if power_state in ["standby", "sleep"]:
|
||||||
return "休眠中"
|
return "休眠中"
|
||||||
|
|
||||||
# 检查最近一分钟内的硬盘活动
|
# 检查最近的I/O活动
|
||||||
stat_path = f"/sys/block/{device}/stat"
|
stat_path = f"/sys/block/{device}/stat"
|
||||||
stat_output = await self.coordinator.run_command(f"cat {stat_path}")
|
stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null")
|
||||||
|
|
||||||
|
if stat_output:
|
||||||
stats = stat_output.split()
|
stats = stat_output.split()
|
||||||
|
|
||||||
if len(stats) >= 11:
|
if len(stats) >= 11:
|
||||||
# 第9个字段是最近完成的读操作数
|
try:
|
||||||
# 第10个字段是最近完成的写操作数
|
in_flight = int(stats[8]) # 当前进行中的I/O
|
||||||
recent_reads = int(stats[8])
|
|
||||||
recent_writes = int(stats[9])
|
|
||||||
|
|
||||||
if recent_reads > 0 or recent_writes > 0:
|
# 如果有正在进行的I/O,返回活动中
|
||||||
|
if in_flight > 0:
|
||||||
return "活动中"
|
return "活动中"
|
||||||
|
|
||||||
|
# 检查缓存的统计信息来判断近期活动
|
||||||
|
cached_stats = self.disk_io_stats_cache.get(device)
|
||||||
|
if cached_stats:
|
||||||
|
current_read_ios = int(stats[0])
|
||||||
|
current_write_ios = int(stats[4])
|
||||||
|
|
||||||
|
read_diff = current_read_ios - cached_stats.get('read_ios', 0)
|
||||||
|
write_diff = current_write_ios - cached_stats.get('write_ios', 0)
|
||||||
|
|
||||||
|
if read_diff > 0 or write_diff > 0:
|
||||||
|
return "活动中"
|
||||||
|
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
pass
|
||||||
|
|
||||||
return "空闲中"
|
return "空闲中"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@@ -293,89 +293,84 @@ class SystemManager:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
async def get_vol_usage(self) -> dict:
|
async def get_vol_usage(self) -> dict:
|
||||||
"""获取 /vol* 开头的存储卷使用信息"""
|
"""获取 /vol* 开头的存储卷使用信息,避免唤醒休眠磁盘"""
|
||||||
try:
|
try:
|
||||||
# 优先使用字节单位
|
# 首先检查哪些卷是活跃的,避免访问休眠磁盘
|
||||||
df_output = await self.coordinator.run_command("df -B 1 /vol* 2>/dev/null")
|
active_vols = await self.check_active_volumes()
|
||||||
|
|
||||||
|
if active_vols:
|
||||||
|
# 只查询活跃的卷,避免使用通配符可能唤醒所有磁盘
|
||||||
|
vol_list = " ".join(active_vols)
|
||||||
|
df_output = await self.coordinator.run_command(f"df -B 1 {vol_list} 2>/dev/null")
|
||||||
if df_output:
|
if df_output:
|
||||||
return self.parse_df_bytes(df_output)
|
return self.parse_df_bytes(df_output)
|
||||||
|
|
||||||
df_output = await self.coordinator.run_command("df -h /vol*")
|
df_output = await self.coordinator.run_command(f"df -h {vol_list} 2>/dev/null")
|
||||||
if df_output:
|
if df_output:
|
||||||
return self.parse_df_human_readable(df_output)
|
return self.parse_df_human_readable(df_output)
|
||||||
|
|
||||||
|
# 如果没有活跃卷或者上述方法失败,使用缓存或者返回空
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error("获取存储卷信息失败: %s", str(e))
|
self.logger.error("获取存储卷信息失败: %s", str(e))
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
def parse_df_bytes(self, df_output: str) -> dict:
|
async def check_active_volumes(self) -> list:
|
||||||
volumes = {}
|
"""检查当前活跃的存储卷,避免唤醒休眠磁盘"""
|
||||||
for line in df_output.splitlines()[1:]:
|
|
||||||
parts = line.split()
|
|
||||||
if len(parts) < 6:
|
|
||||||
continue
|
|
||||||
|
|
||||||
mount_point = parts[-1]
|
|
||||||
# 只处理 /vol 开头的挂载点
|
|
||||||
if not mount_point.startswith("/vol"):
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
size_bytes = int(parts[1])
|
# 获取所有挂载点,这个操作不会访问磁盘内容
|
||||||
used_bytes = int(parts[2])
|
mount_output = await self.coordinator.run_command("mount | grep '/vol'")
|
||||||
avail_bytes = int(parts[3])
|
active_vols = []
|
||||||
use_percent = parts[4]
|
|
||||||
|
|
||||||
def bytes_to_human(b):
|
for line in mount_output.splitlines():
|
||||||
for unit in ['', 'K', 'M', 'G', 'T']:
|
if '/vol' in line:
|
||||||
if abs(b) < 1024.0:
|
# 提取挂载点
|
||||||
return f"{b:.1f}{unit}"
|
|
||||||
b /= 1024.0
|
|
||||||
return f"{b:.1f}P"
|
|
||||||
|
|
||||||
volumes[mount_point] = {
|
|
||||||
"filesystem": parts[0],
|
|
||||||
"size": bytes_to_human(size_bytes),
|
|
||||||
"used": bytes_to_human(used_bytes),
|
|
||||||
"available": bytes_to_human(avail_bytes),
|
|
||||||
"use_percent": use_percent
|
|
||||||
}
|
|
||||||
except (ValueError, IndexError) as e:
|
|
||||||
self.logger.debug("解析存储卷行失败: %s - %s", line, str(e))
|
|
||||||
continue
|
|
||||||
|
|
||||||
return volumes
|
|
||||||
|
|
||||||
def parse_df_human_readable(self, df_output: str) -> dict:
|
|
||||||
volumes = {}
|
|
||||||
for line in df_output.splitlines()[1:]:
|
|
||||||
parts = line.split()
|
parts = line.split()
|
||||||
if len(parts) < 6:
|
for part in parts:
|
||||||
continue
|
if part.startswith('/vol'):
|
||||||
|
# 检查这个卷对应的磁盘是否活跃
|
||||||
|
if await self.is_volume_disk_active(part):
|
||||||
|
active_vols.append(part)
|
||||||
|
break
|
||||||
|
|
||||||
mount_point = parts[-1]
|
self._debug_log(f"检测到活跃存储卷: {active_vols}")
|
||||||
if not mount_point.startswith("/vol"):
|
return active_vols
|
||||||
continue
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._debug_log(f"检查活跃存储卷失败: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def is_volume_disk_active(self, mount_point: str) -> bool:
|
||||||
|
"""检查存储卷对应的磁盘是否活跃"""
|
||||||
try:
|
try:
|
||||||
size = parts[1]
|
# 获取挂载点对应的设备
|
||||||
used = parts[2]
|
device_output = await self.coordinator.run_command(f"findmnt -n -o SOURCE {mount_point} 2>/dev/null")
|
||||||
avail = parts[3]
|
if not device_output:
|
||||||
use_percent = parts[4]
|
return False
|
||||||
|
|
||||||
volumes[mount_point] = {
|
device = device_output.strip()
|
||||||
"filesystem": parts[0],
|
# 提取设备名(去掉分区号)
|
||||||
"size": size,
|
import re
|
||||||
"used": used,
|
device_match = re.search(r'/dev/([a-zA-Z]+)', device)
|
||||||
"available": avail,
|
if device_match:
|
||||||
"use_percent": use_percent
|
device_name = device_match.group(1)
|
||||||
}
|
|
||||||
except (ValueError, IndexError) as e:
|
|
||||||
self.logger.debug("解析存储卷行失败: %s - %s", line, str(e))
|
|
||||||
continue
|
|
||||||
|
|
||||||
return volumes
|
# 检查设备的I/O统计,不直接访问磁盘
|
||||||
|
stat_path = f"/sys/block/{device_name}/stat"
|
||||||
|
stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null")
|
||||||
|
|
||||||
|
if stat_output:
|
||||||
|
stats = stat_output.split()
|
||||||
|
if len(stats) >= 9:
|
||||||
|
in_flight = int(stats[8]) # 当前进行中的I/O
|
||||||
|
return in_flight > 0 # 有I/O活动认为是活跃的
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self._debug_log(f"检查卷磁盘活跃状态失败 {mount_point}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
async def reboot_system(self):
|
async def reboot_system(self):
|
||||||
"""重启系统"""
|
"""重启系统"""
|
||||||
|
Reference in New Issue
Block a user