diff --git a/custom_components/fn_nas/coordinator.py b/custom_components/fn_nas/coordinator.py index 1547256..295d349 100644 --- a/custom_components/fn_nas/coordinator.py +++ b/custom_components/fn_nas/coordinator.py @@ -313,11 +313,34 @@ class FlynasCoordinator(DataUpdateCoordinator): if connection_id is not None: await self.release_ssh_connection(connection_id) + async def ping_system(self) -> bool: + """轻量级系统状态检测""" + # 对于本地主机直接返回True + if self.host in ['localhost', '127.0.0.1']: + return True + + try: + # 使用异步ping检测,减少超时时间 + proc = await asyncio.create_subprocess_exec( + 'ping', '-c', '1', '-W', '1', self.host, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL + ) + await asyncio.wait_for(proc.wait(), timeout=2) # 总超时时间2秒 + return proc.returncode == 0 + except Exception: + return False + async def _monitor_system_status(self): """系统离线时轮询检测状态""" self._debug_log(f"启动系统状态监控,每{self._retry_interval}秒检测一次") + + # 使用指数退避策略,避免频繁检测 + check_interval = self._retry_interval + max_interval = 300 # 最大5分钟检测一次 + while True: - await asyncio.sleep(self._retry_interval) + await asyncio.sleep(check_interval) if await self.ping_system(): self._info_log("检测到系统已开机,触发重新加载") @@ -326,24 +349,10 @@ class FlynasCoordinator(DataUpdateCoordinator): self.hass.config_entries.async_reload(self.config_entry.entry_id) ) break - - async def ping_system(self) -> bool: - """轻量级系统状态检测""" - # 对于本地主机直接返回True - if self.host in ['localhost', '127.0.0.1']: - return True - - try: - # 使用异步ping检测 - proc = await asyncio.create_subprocess_exec( - 'ping', '-c', '1', '-W', '1', self.host, - stdout=asyncio.subprocess.DEVNULL, - stderr=asyncio.subprocess.DEVNULL - ) - await proc.wait() - return proc.returncode == 0 - except Exception: - return False + else: + # 系统仍然离线,增加检测间隔(指数退避) + check_interval = min(check_interval * 1.5, max_interval) + self._debug_log(f"系统仍离线,下次检测间隔: {check_interval}秒") async def _async_update_data(self): """数据更新入口,优化命令执行频率""" diff --git a/custom_components/fn_nas/disk_manager.py b/custom_components/fn_nas/disk_manager.py index 2afb98c..a8f447f 100644 --- a/custom_components/fn_nas/disk_manager.py +++ b/custom_components/fn_nas/disk_manager.py @@ -14,6 +14,7 @@ class DiskManager: self.disk_full_info_cache = {} # 缓存磁盘完整信息 self.first_run = True # 首次运行标志 self.initial_detection_done = False # 首次完整检测完成标志 + self.disk_io_stats_cache = {} # 缓存磁盘I/O统计信息 def extract_value(self, text: str, patterns, default="未知", format_func=None): if not text: @@ -38,10 +39,9 @@ class DiskManager: async def check_disk_active(self, device: str, window: int = 30) -> bool: """检查硬盘在指定时间窗口内是否有活动""" try: - # 正确的路径是 /sys/block/{device}/stat stat_path = f"/sys/block/{device}/stat" - # 读取统计文件 + # 读取当前统计文件 stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null") if not stat_output: self.logger.debug(f"无法读取 {stat_path},默认返回活跃状态") @@ -52,52 +52,148 @@ class DiskManager: if len(stats) < 11: self.logger.debug(f"无效的统计信息格式:{stat_output}") return True - - # 关键字段:当前正在进行的I/O操作数量(第9个字段,索引8) - in_flight = int(stats[8]) - # 如果当前有I/O操作,直接返回活跃状态 - if in_flight > 0: + try: + # /sys/block/{device}/stat 字段说明: + # 0: read I/Os requests 读请求次数 + # 1: read I/Os merged 读请求合并次数 + # 2: read sectors 读扇区数 + # 3: read ticks 读操作耗时(ms) + # 4: write I/Os requests 写请求次数 + # 5: write I/Os merged 写请求合并次数 + # 6: write sectors 写扇区数 + # 7: write ticks 写操作耗时(ms) + # 8: in_flight 当前进行中的I/O请求数 + # 9: io_ticks I/O活动时间(ms) + # 10: time_in_queue 队列中的总时间(ms) + + current_stats = { + 'read_ios': int(stats[0]), + 'write_ios': int(stats[4]), + 'in_flight': int(stats[8]), + 'io_ticks': int(stats[9]) + } + + # 如果当前有正在进行的I/O操作,直接返回活跃状态 + if current_stats['in_flight'] > 0: + self.logger.debug(f"磁盘 {device} 有正在进行的I/O操作: {current_stats['in_flight']}") + self.disk_io_stats_cache[device] = current_stats + return True + + # 检查是否有缓存的统计信息 + cached_stats = self.disk_io_stats_cache.get(device) + + if cached_stats: + # 比较I/O请求次数的变化 + read_ios_diff = current_stats['read_ios'] - cached_stats['read_ios'] + write_ios_diff = current_stats['write_ios'] - cached_stats['write_ios'] + io_ticks_diff = current_stats['io_ticks'] - cached_stats['io_ticks'] + + self.logger.debug(f"磁盘 {device} I/O变化: 读={read_ios_diff}, 写={write_ios_diff}, 活动时间={io_ticks_diff}ms") + + # 如果在检测窗口内有I/O活动,认为磁盘活跃 + if read_ios_diff > 0 or write_ios_diff > 0 or io_ticks_diff > 100: # 100ms内的活动 + self.logger.debug(f"磁盘 {device} 在窗口期内有I/O活动") + self.disk_io_stats_cache[device] = current_stats + return True + + # 检查io_ticks是否表明最近有活动 + # io_ticks是累积值,如果在合理范围内增长,说明有轻微活动 + if io_ticks_diff > 0 and io_ticks_diff < window * 1000: # 在窗口时间内的轻微活动 + self.logger.debug(f"磁盘 {device} 有轻微I/O活动") + self.disk_io_stats_cache[device] = current_stats + return True + else: + # 首次检测,保存当前状态并认为活跃 + self.logger.debug(f"磁盘 {device} 首次检测,保存统计信息") + self.disk_io_stats_cache[device] = current_stats + return True + + # 更新缓存 + self.disk_io_stats_cache[device] = current_stats + + # 检查硬盘电源状态 + power_state = await self.get_disk_power_state(device) + if power_state in ["standby", "sleep", "idle"]: + self.logger.debug(f"磁盘 {device} 处于省电状态: {power_state}") + return False + + # 所有检查都通过,返回非活跃状态 + self.logger.debug(f"磁盘 {device} 判定为非活跃状态") + return False + + except (ValueError, IndexError) as e: + self.logger.debug(f"解析统计信息失败: {e}") return True - # 检查I/O操作时间(第10个字段,索引9) - io_ticks(单位毫秒) - io_ticks = int(stats[9]) - - # 如果设备在窗口时间内有I/O活动,返回活跃状态 - if io_ticks > window * 1000: - return True - - # 所有检查都通过,返回非活跃状态 - return False - except Exception as e: - self.logger.error(f"检测硬盘活动状态失败: {str(e)}", exc_info=True) + self.logger.error(f"检测硬盘活动状态失败: {str(e)}") return True # 出错时默认执行检测 - async def get_disk_activity(self, device: str) -> str: - """获取硬盘活动状态(活动中/空闲中/休眠中)""" + async def get_disk_power_state(self, device: str) -> str: + """获取硬盘电源状态""" try: - # 检查硬盘是否处于休眠状态 + # 检查 SCSI 设备状态 state_path = f"/sys/block/{device}/device/state" state_output = await self.coordinator.run_command(f"cat {state_path} 2>/dev/null || echo 'unknown'") state = state_output.strip().lower() - if state in ["standby", "sleep"]: + if state in ["running", "active"]: + return "active" + elif state in ["standby", "sleep"]: + return state + + # 对于某些设备,尝试通过hdparm检查状态(非侵入性) + hdparm_output = await self.coordinator.run_command(f"hdparm -C /dev/{device} 2>/dev/null || echo 'unknown'") + if "standby" in hdparm_output.lower(): + return "standby" + elif "sleeping" in hdparm_output.lower(): + return "sleep" + elif "active/idle" in hdparm_output.lower(): + return "active" + + return "unknown" + + except Exception as e: + self.logger.debug(f"获取磁盘 {device} 电源状态失败: {e}") + return "unknown" + + async def get_disk_activity(self, device: str) -> str: + """获取硬盘活动状态(活动中/空闲中/休眠中)""" + try: + # 先检查电源状态 + power_state = await self.get_disk_power_state(device) + if power_state in ["standby", "sleep"]: return "休眠中" - # 检查最近一分钟内的硬盘活动 + # 检查最近的I/O活动 stat_path = f"/sys/block/{device}/stat" - stat_output = await self.coordinator.run_command(f"cat {stat_path}") - stats = stat_output.split() + stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null") - if len(stats) >= 11: - # 第9个字段是最近完成的读操作数 - # 第10个字段是最近完成的写操作数 - recent_reads = int(stats[8]) - recent_writes = int(stats[9]) - - if recent_reads > 0 or recent_writes > 0: - return "活动中" + if stat_output: + stats = stat_output.split() + if len(stats) >= 11: + try: + in_flight = int(stats[8]) # 当前进行中的I/O + + # 如果有正在进行的I/O,返回活动中 + if in_flight > 0: + return "活动中" + + # 检查缓存的统计信息来判断近期活动 + cached_stats = self.disk_io_stats_cache.get(device) + if cached_stats: + current_read_ios = int(stats[0]) + current_write_ios = int(stats[4]) + + read_diff = current_read_ios - cached_stats.get('read_ios', 0) + write_diff = current_write_ios - cached_stats.get('write_ios', 0) + + if read_diff > 0 or write_diff > 0: + return "活动中" + + except (ValueError, IndexError): + pass return "空闲中" diff --git a/custom_components/fn_nas/system_manager.py b/custom_components/fn_nas/system_manager.py index 942a4c5..ff4cd9b 100644 --- a/custom_components/fn_nas/system_manager.py +++ b/custom_components/fn_nas/system_manager.py @@ -293,90 +293,85 @@ class SystemManager: return {} async def get_vol_usage(self) -> dict: - """获取 /vol* 开头的存储卷使用信息""" + """获取 /vol* 开头的存储卷使用信息,避免唤醒休眠磁盘""" try: - # 优先使用字节单位 - df_output = await self.coordinator.run_command("df -B 1 /vol* 2>/dev/null") - if df_output: - return self.parse_df_bytes(df_output) + # 首先检查哪些卷是活跃的,避免访问休眠磁盘 + active_vols = await self.check_active_volumes() - df_output = await self.coordinator.run_command("df -h /vol*") - if df_output: - return self.parse_df_human_readable(df_output) + if active_vols: + # 只查询活跃的卷,避免使用通配符可能唤醒所有磁盘 + vol_list = " ".join(active_vols) + df_output = await self.coordinator.run_command(f"df -B 1 {vol_list} 2>/dev/null") + if df_output: + return self.parse_df_bytes(df_output) + df_output = await self.coordinator.run_command(f"df -h {vol_list} 2>/dev/null") + if df_output: + return self.parse_df_human_readable(df_output) + + # 如果没有活跃卷或者上述方法失败,使用缓存或者返回空 return {} + except Exception as e: self.logger.error("获取存储卷信息失败: %s", str(e)) return {} - def parse_df_bytes(self, df_output: str) -> dict: - volumes = {} - for line in df_output.splitlines()[1:]: - parts = line.split() - if len(parts) < 6: - continue - - mount_point = parts[-1] - # 只处理 /vol 开头的挂载点 - if not mount_point.startswith("/vol"): - continue - - try: - size_bytes = int(parts[1]) - used_bytes = int(parts[2]) - avail_bytes = int(parts[3]) - use_percent = parts[4] - - def bytes_to_human(b): - for unit in ['', 'K', 'M', 'G', 'T']: - if abs(b) < 1024.0: - return f"{b:.1f}{unit}" - b /= 1024.0 - return f"{b:.1f}P" - - volumes[mount_point] = { - "filesystem": parts[0], - "size": bytes_to_human(size_bytes), - "used": bytes_to_human(used_bytes), - "available": bytes_to_human(avail_bytes), - "use_percent": use_percent - } - except (ValueError, IndexError) as e: - self.logger.debug("解析存储卷行失败: %s - %s", line, str(e)) - continue - - return volumes + async def check_active_volumes(self) -> list: + """检查当前活跃的存储卷,避免唤醒休眠磁盘""" + try: + # 获取所有挂载点,这个操作不会访问磁盘内容 + mount_output = await self.coordinator.run_command("mount | grep '/vol'") + active_vols = [] + + for line in mount_output.splitlines(): + if '/vol' in line: + # 提取挂载点 + parts = line.split() + for part in parts: + if part.startswith('/vol'): + # 检查这个卷对应的磁盘是否活跃 + if await self.is_volume_disk_active(part): + active_vols.append(part) + break + + self._debug_log(f"检测到活跃存储卷: {active_vols}") + return active_vols + + except Exception as e: + self._debug_log(f"检查活跃存储卷失败: {e}") + return [] - def parse_df_human_readable(self, df_output: str) -> dict: - volumes = {} - for line in df_output.splitlines()[1:]: - parts = line.split() - if len(parts) < 6: - continue + async def is_volume_disk_active(self, mount_point: str) -> bool: + """检查存储卷对应的磁盘是否活跃""" + try: + # 获取挂载点对应的设备 + device_output = await self.coordinator.run_command(f"findmnt -n -o SOURCE {mount_point} 2>/dev/null") + if not device_output: + return False + + device = device_output.strip() + # 提取设备名(去掉分区号) + import re + device_match = re.search(r'/dev/([a-zA-Z]+)', device) + if device_match: + device_name = device_match.group(1) - mount_point = parts[-1] - if not mount_point.startswith("/vol"): - continue + # 检查设备的I/O统计,不直接访问磁盘 + stat_path = f"/sys/block/{device_name}/stat" + stat_output = await self.coordinator.run_command(f"cat {stat_path} 2>/dev/null") - try: - size = parts[1] - used = parts[2] - avail = parts[3] - use_percent = parts[4] - - volumes[mount_point] = { - "filesystem": parts[0], - "size": size, - "used": used, - "available": avail, - "use_percent": use_percent - } - except (ValueError, IndexError) as e: - self.logger.debug("解析存储卷行失败: %s - %s", line, str(e)) - continue - - return volumes - + if stat_output: + stats = stat_output.split() + if len(stats) >= 9: + in_flight = int(stats[8]) # 当前进行中的I/O + return in_flight > 0 # 有I/O活动认为是活跃的 + + return False + + except Exception as e: + self._debug_log(f"检查卷磁盘活跃状态失败 {mount_point}: {e}") + return False + async def reboot_system(self): """重启系统""" self._info_log("Initiating system reboot...")