From d3e7fee8285de53e3ee8247816b85aa231413132 Mon Sep 17 00:00:00 2001 From: Xiaoxia Date: Mon, 22 Sep 2025 10:49:08 +0800 Subject: [PATCH] fix multiple wakenet words and custom wake word (#1226) * fix multiple wakenet words and custom wake word * fix idf_component.yml --- main/Kconfig.projbuild | 176 ++++++++++++---------- main/application.cc | 8 +- main/audio/wake_words/custom_wake_word.cc | 2 +- main/idf_component.yml | 7 +- scripts/build_default_assets.py | 170 ++++++++++++++------- 5 files changed, 217 insertions(+), 146 deletions(-) diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index b8017723..02de0938 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -86,37 +86,37 @@ choice BOARD_TYPE help Board type. 开发板类型 config BOARD_TYPE_BREAD_COMPACT_WIFI - bool "面包板新版接线(WiFi)" + bool "Bread Compact WiFi (面包板)" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_WIFI_LCD - bool "面包板新版接线(WiFi)+ LCD" + bool "Bread Compact WiFi + LCD (面包板)" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_WIFI_CAM - bool "面包板新版接线(WiFi)+ LCD + Camera" + bool "Bread Compact WiFi + LCD + Camera (面包板)" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_ML307 - bool "面包板新版接线(ML307 AT)" + bool "Bread Compact ML307/EC801E (面包板 4G)" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_ESP32 - bool "面包板(WiFi) ESP32 DevKit" + bool "Bread Compact ESP32 DevKit (面包板)" depends on IDF_TARGET_ESP32 config BOARD_TYPE_BREAD_COMPACT_ESP32_LCD - bool "面包板(WiFi+ LCD) ESP32 DevKit" + bool "Bread Compact ESP32 DevKit + LCD (面包板)" depends on IDF_TARGET_ESP32 config BOARD_TYPE_XMINI_C3_V3 - bool "虾哥 Mini C3 V3" + bool "Xmini C3 V3" depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_XMINI_C3_4G - bool "虾哥 Mini C3 4G" + bool "Xmini C3 4G" depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_XMINI_C3 - bool "虾哥 Mini C3" + bool "Xmini C3" depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_ESP32S3_KORVO2_V3 - bool "ESP32S3_KORVO2_V3开发板" + bool "ESP32S3 KORVO2 V3" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_SPARKBOT - bool "ESP-SparkBot开发板" + bool "ESP-SparkBot" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_SPOT_S3 bool "ESP-Spot-S3" @@ -146,10 +146,10 @@ choice BOARD_TYPE bool "Kevin C3" depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_KEVIN_SP_V3_DEV - bool "Kevin SP V3开发板" + bool "Kevin SP V3" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_KEVIN_SP_V4_DEV - bool "Kevin SP V4开发板" + bool "Kevin SP V4" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32_CGC bool "ESP32 CGC" @@ -158,13 +158,13 @@ choice BOARD_TYPE bool "ESP32 CGC 144" depends on IDF_TARGET_ESP32 config BOARD_TYPE_KEVIN_YUYING_313LCD - bool "鱼鹰科技3.13LCD开发板" + bool "鱼鹰科技 3.13LCD" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LICHUANG_DEV - bool "立创·实战派ESP32-S3开发板" + bool "立创·实战派 ESP32-S3" depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LICHUANG_C3_DEV - bool "立创·实战派ESP32-C3开发板" + bool "立创·实战派 ESP32-C3" depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_DF_K10 bool "DFRobot 行空板 k10" @@ -373,7 +373,7 @@ choice BOARD_TYPE depends on IDF_TARGET_ESP32S3 depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_SURFER_C3_1_14TFT - bool "Surfer-C3-1-14TFT" + bool "Surfer-C3-1.14TFT" depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_YUNLIAO_S3 bool "小智云聊-S3" @@ -384,8 +384,6 @@ choice ESP_S3_LCD_EV_Board_Version_TYPE depends on BOARD_TYPE_ESP_S3_LCD_EV_Board prompt "EV_BOARD Type" default ESP_S3_LCD_EV_Board_1p4 - help - 开发板硬件版本型号选择 config ESP_S3_LCD_EV_Board_1p4 bool "乐鑫ESP32_S3_LCD_EV_Board-MB_V1.4" config ESP_S3_LCD_EV_Board_1p5 @@ -397,13 +395,13 @@ choice DISPLAY_OLED_TYPE prompt "OLED Type" default OLED_SSD1306_128X32 help - OLED 屏幕类型选择 + OLED Monochrome Display Type config OLED_SSD1306_128X32 - bool "SSD1306, 分辨率128*32" + bool "SSD1306 128*32" config OLED_SSD1306_128X64 - bool "SSD1306, 分辨率128*64" + bool "SSD1306 128*64" config OLED_SH1106_128X64 - bool "SH1106, 分辨率128*64" + bool "SH1106 128*64" endchoice choice DISPLAY_LCD_TYPE @@ -411,37 +409,37 @@ choice DISPLAY_LCD_TYPE prompt "LCD Type" default LCD_ST7789_240X320 help - 屏幕类型选择 + LCD Display Type config LCD_ST7789_240X320 - bool "ST7789, 分辨率240*320, IPS" + bool "ST7789 240*320, IPS" config LCD_ST7789_240X320_NO_IPS - bool "ST7789, 分辨率240*320, 非IPS" + bool "ST7789 240*320, Non-IPS" config LCD_ST7789_170X320 - bool "ST7789, 分辨率170*320" + bool "ST7789 170*320" config LCD_ST7789_172X320 - bool "ST7789, 分辨率172*320" + bool "ST7789 172*320" config LCD_ST7789_240X280 - bool "ST7789, 分辨率240*280" + bool "ST7789 240*280" config LCD_ST7789_240X240 - bool "ST7789, 分辨率240*240" + bool "ST7789 240*240" config LCD_ST7789_240X240_7PIN - bool "ST7789, 分辨率240*240, 7PIN" + bool "ST7789 240*240, 7PIN" config LCD_ST7789_240X135 - bool "ST7789, 分辨率240*135" + bool "ST7789 240*135" config LCD_ST7735_128X160 - bool "ST7735, 分辨率128*160" + bool "ST7735 128*160" config LCD_ST7735_128X128 - bool "ST7735, 分辨率128*128" + bool "ST7735 128*128" config LCD_ST7796_320X480 - bool "ST7796, 分辨率320*480 IPS" + bool "ST7796 320*480 IPS" config LCD_ST7796_320X480_NO_IPS - bool "ST7796, 分辨率320*480, 非IPS" + bool "ST7796 320*480, Non-IPS" config LCD_ILI9341_240X320 - bool "ILI9341, 分辨率240*320" + bool "ILI9341 240*320" config LCD_ILI9341_240X320_NO_IPS - bool "ILI9341, 分辨率240*320, 非IPS" + bool "ILI9341 240*320, Non-IPS" config LCD_GC9A01_240X240 - bool "GC9A01, 分辨率240*240, 圆屏" + bool "GC9A01 240*240 Circle" config LCD_TYPE_800_1280_10_1_INCH bool "Waveshare 101M-8001280-IPS-CT-K Display" config LCD_TYPE_800_1280_10_1_INCH_A @@ -451,7 +449,7 @@ choice DISPLAY_LCD_TYPE config LCD_TYPE_720_720_4_INCH bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-4C with 720*720 4inch round display" config LCD_CUSTOM - bool "自定义屏幕参数" + bool "Custom LCD (自定义屏幕参数)" endchoice choice DISPLAY_ESP32S3_KORVO2_V3 @@ -459,11 +457,11 @@ choice DISPLAY_ESP32S3_KORVO2_V3 prompt "ESP32S3_KORVO2_V3 LCD Type" default ESP32S3_KORVO2_V3_LCD_ST7789 help - 屏幕类型选择 + LCD Display Type config ESP32S3_KORVO2_V3_LCD_ST7789 - bool "ST7789, 分辨率240*280" + bool "ST7789 240*280" config ESP32S3_KORVO2_V3_LCD_ILI9341 - bool "ILI9341, 分辨率240*320" + bool "ILI9341 240*320" endchoice choice DISPLAY_ESP32S3_AUDIO_BOARD @@ -471,11 +469,11 @@ choice DISPLAY_ESP32S3_AUDIO_BOARD prompt "ESP32S3_AUDIO_BOARD LCD Type" default AUDIO_BOARD_LCD_JD9853 help - 屏幕类型选择 + LCD Display Type config AUDIO_BOARD_LCD_JD9853 - bool "JD9853, 分辨率320*172" + bool "JD9853 320*172" config AUDIO_BOARD_LCD_ST7789 - bool "ST7789, 分辨率240*320" + bool "ST7789 240*320" endchoice choice DISPLAY_STYLE @@ -495,40 +493,51 @@ choice DISPLAY_STYLE depends on BOARD_TYPE_ESP_BOX_3 || BOARD_TYPE_ECHOEAR endchoice -config USE_ESP_WAKE_WORD - bool "Enable Wake Word Detection (without AFE)" - default n - depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5 || IDF_TARGET_ESP32C6 || (IDF_TARGET_ESP32 && SPIRAM) +choice WAKE_WORD_TYPE + prompt "Wake Word Implementation Type" + default USE_AFE_WAKE_WORD if (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM + default WAKE_WORD_DISABLED help - 支持 ESP32 C3、ESP32 C5 与 ESP32 C6,增加ESP32支持(需要开启PSRAM) + Choose the type of wake word implementation to use -config USE_AFE_WAKE_WORD - bool "Enable Wake Word Detection (AFE)" - default y - depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM - help - 需要 ESP32 S3 与 PSRAM 支持 + config WAKE_WORD_DISABLED + bool "Disabled" + help + Disable wake word detection + + config USE_ESP_WAKE_WORD + bool "Wakenet model without AFE" + depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5 || IDF_TARGET_ESP32C6 || (IDF_TARGET_ESP32 && SPIRAM) + help + Support ESP32 C3、ESP32 C5 与 ESP32 C6, and (ESP32 with PSRAM) + + config USE_AFE_WAKE_WORD + bool "Wakenet model with AFE" + depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM + help + Support AEC if available, requires ESP32 S3 and PSRAM + + config USE_CUSTOM_WAKE_WORD + bool "Multinet model (Custom Wake Word)" + depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM + help + Requires ESP32 S3 and PSRAM + +endchoice -config USE_CUSTOM_WAKE_WORD - bool "Enable Custom Wake Word Detection" - default n - depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM && (!USE_AFE_WAKE_WORD) - help - 需要 ESP32 S3 与 PSRAM 支持 - config CUSTOM_WAKE_WORD string "Custom Wake Word" default "xiao tu dou" depends on USE_CUSTOM_WAKE_WORD help - 自定义唤醒词,中文用拼音表示,每个字之间用空格隔开 + Custom Wake Word, use pinyin for Chinese, separated by spaces config CUSTOM_WAKE_WORD_DISPLAY string "Custom Wake Word Display" default "小土豆" depends on USE_CUSTOM_WAKE_WORD help - 唤醒后发送给服务器的问候语 + Greeting sent to the server after wake word detection config CUSTOM_WAKE_WORD_THRESHOLD int "Custom Wake Word Threshold (%)" @@ -536,14 +545,21 @@ config CUSTOM_WAKE_WORD_THRESHOLD range 1 99 depends on USE_CUSTOM_WAKE_WORD help - 自定义唤醒词阈值,范围1-99,越小越敏感,默认10 + Custom Wake Word Threshold, range 1-99, the smaller the more sensitive, default 20 +config SEND_WAKE_WORD_DATA + bool "Send Wake Word Data" + default y + depends on USE_AFE_WAKE_WORD || USE_CUSTOM_WAKE_WORD + help + Send wake word data to the server as the first message of the conversation and wait for response + config USE_AUDIO_PROCESSOR bool "Enable Audio Noise Reduction" default y depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM help - 需要 ESP32 S3 与 PSRAM 支持 + Requires ESP32 S3 and PSRAM config USE_DEVICE_AEC bool "Enable Device-Side AEC" @@ -554,46 +570,46 @@ config USE_DEVICE_AEC || BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_XC || BOARD_TYPE_ESP_S3_LCD_EV_Board_2 || BOARD_TYPE_YUNLIAO_S3 \ || BOARD_TYPE_ECHOEAR || BOARD_TYPE_ESP32S3_Touch_LCD_3_49) help - 因为性能不够,不建议和微信聊天界面风格同时开启 + To work properly, device-side AEC requires a clean output reference path from the speaker signal and physical acoustic isolation between the microphone and speaker. config USE_SERVER_AEC bool "Enable Server-Side AEC (Unstable)" default n depends on USE_AUDIO_PROCESSOR help - 启用服务器端 AEC,需要服务器支持 + To work perperly, server-side AEC requires server support config USE_AUDIO_DEBUGGER bool "Enable Audio Debugger" default n help - 启用音频调试功能,通过UDP发送音频数据 - -config USE_ACOUSTIC_WIFI_PROVISIONING - bool "Enable Acoustic WiFi Provisioning" - default n - help - 启用声波配网功能,使用音频信号传输 WiFi 配置数据 + Enable audio debugger, send audio data through UDP to the host machine config AUDIO_DEBUG_UDP_SERVER string "Audio Debug UDP Server Address" default "192.168.2.100:8000" depends on USE_AUDIO_DEBUGGER help - UDP服务器地址,格式: IP:PORT,用于接收音频调试数据 + UDP server address, format: IP:PORT, used to receive audio debugging data + +config USE_ACOUSTIC_WIFI_PROVISIONING + bool "Enable Acoustic WiFi Provisioning" + default n + help + Enable acoustic WiFi provisioning, use audio signal to transmit WiFi configuration data config RECEIVE_CUSTOM_MESSAGE bool "Enable Custom Message Reception" default n help - 启用接收自定义消息功能,允许设备接收来自服务器的自定义消息(最好通过 MQTT 协议) + Enable custom message reception, allow the device to receive custom messages from the server (preferably through the MQTT protocol) choice I2S_TYPE_TAIJIPI_S3 depends on BOARD_TYPE_ESP32S3_Taiji_Pi prompt "taiji-pi-S3 I2S Type" default TAIJIPAI_I2S_TYPE_STD help - I2S 类型选择 + I2S Type config TAIJIPAI_I2S_TYPE_STD bool "I2S Type STD" config TAIJIPAI_I2S_TYPE_PDM diff --git a/main/application.cc b/main/application.cc index 2cb3db0b..5a51c742 100644 --- a/main/application.cc +++ b/main/application.cc @@ -630,7 +630,7 @@ void Application::OnWakeWordDetected() { auto wake_word = audio_service_.GetLastWakeWord(); ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); -#if CONFIG_USE_AFE_WAKE_WORD || CONFIG_USE_CUSTOM_WAKE_WORD +#if CONFIG_SEND_WAKE_WORD_DATA // Encode and send the wake word data to the server while (auto packet = audio_service_.PopWakeWordPacket()) { protocol_->SendAudio(std::move(packet)); @@ -711,11 +711,7 @@ void Application::SetDeviceState(DeviceState state) { if (listening_mode_ != kListeningModeRealtime) { audio_service_.EnableVoiceProcessing(false); // Only AFE wake word can be detected in speaking mode -#if CONFIG_USE_AFE_WAKE_WORD - audio_service_.EnableWakeWordDetection(true); -#else - audio_service_.EnableWakeWordDetection(false); -#endif + audio_service_.EnableWakeWordDetection(audio_service_.IsAfeWakeWord()); } audio_service_.ResetDecoder(); break; diff --git a/main/audio/wake_words/custom_wake_word.cc b/main/audio/wake_words/custom_wake_word.cc index f048af17..9fa66588 100644 --- a/main/audio/wake_words/custom_wake_word.cc +++ b/main/audio/wake_words/custom_wake_word.cc @@ -91,7 +91,7 @@ bool CustomWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) if (models_list == nullptr) { language_ = "cn"; models_ = esp_srmodel_init("model"); -#if CONFIG_CUSTOM_WAKE_WORD +#ifdef CONFIG_CUSTOM_WAKE_WORD threshold_ = CONFIG_CUSTOM_WAKE_WORD_THRESHOLD / 100.0f; commands_.push_back({CONFIG_CUSTOM_WAKE_WORD, CONFIG_CUSTOM_WAKE_WORD_DISPLAY, "wake"}); #endif diff --git a/main/idf_component.yml b/main/idf_component.yml index 9d7d762f..97424c9d 100644 --- a/main/idf_component.yml +++ b/main/idf_component.yml @@ -4,10 +4,13 @@ dependencies: espressif/esp_lcd_ili9341: ==1.2.0 espressif/esp_lcd_gc9a01: ==2.0.1 espressif/esp_lcd_st77916: ^1.0.1 - espressif/esp_lcd_st7701: "^1.1.*" espressif/esp_lcd_axs15231b: ^1.0.0 + espressif/esp_lcd_st7701: + version: ^1.1.4 + rules: + - if: target in [esp32s3, esp32p4] espressif/esp_lcd_st7796: - version: 1.3.4 + version: 1.3.5 rules: - if: target not in [esp32c3] espressif/esp_lcd_spd2010: ==1.0.2 diff --git a/scripts/build_default_assets.py b/scripts/build_default_assets.py index 36c7c107..37e4e01d 100755 --- a/scripts/build_default_assets.py +++ b/scripts/build_default_assets.py @@ -17,8 +17,6 @@ import shutil import sys import json import struct -import math -from pathlib import Path from datetime import datetime @@ -156,9 +154,9 @@ def copy_directory(src, dst): return False -def process_sr_models(wakenet_model_dir, multinet_model_dirs, build_dir, assets_dir): +def process_sr_models(wakenet_model_dirs, multinet_model_dirs, build_dir, assets_dir): """Process SR models (wakenet and multinet) and generate srmodels.bin""" - if not wakenet_model_dir and not multinet_model_dirs: + if not wakenet_model_dirs and not multinet_model_dirs: return None # Create SR models build directory @@ -169,13 +167,14 @@ def process_sr_models(wakenet_model_dir, multinet_model_dirs, build_dir, assets_ models_processed = 0 - # Copy wakenet model if available - if wakenet_model_dir: - wakenet_name = os.path.basename(wakenet_model_dir) - wakenet_dst = os.path.join(sr_models_build_dir, wakenet_name) - if copy_directory(wakenet_model_dir, wakenet_dst): - models_processed += 1 - print(f"Added wakenet model: {wakenet_name}") + # Copy wakenet models if available + if wakenet_model_dirs: + for wakenet_model_dir in wakenet_model_dirs: + wakenet_name = os.path.basename(wakenet_model_dir) + wakenet_dst = os.path.join(sr_models_build_dir, wakenet_name) + if copy_directory(wakenet_model_dir, wakenet_dst): + models_processed += 1 + print(f"Added wakenet model: {wakenet_name}") # Copy multinet models if available if multinet_model_dirs: @@ -203,11 +202,6 @@ def process_sr_models(wakenet_model_dir, multinet_model_dirs, build_dir, assets_ return None -def process_wakenet_model(wakenet_model_dir, build_dir, assets_dir): - """Process wakenet_model parameter (legacy compatibility function)""" - return process_sr_models(wakenet_model_dir, None, build_dir, assets_dir) - - def process_text_font(text_font_file, assets_dir): """Process text_font parameter""" if not text_font_file: @@ -440,12 +434,12 @@ def pack_assets_simple(target_path, include_path, out_file, assets_path, max_nam def read_wakenet_from_sdkconfig(sdkconfig_path): """ - Read wakenet model from sdkconfig (based on movemodel.py logic) - Returns the wakenet model name or None if no wakenet is configured + Read wakenet models from sdkconfig (based on movemodel.py logic) + Returns a list of wakenet model names """ if not os.path.exists(sdkconfig_path): print(f"Warning: sdkconfig file not found: {sdkconfig_path}") - return None + return [] models = [] with io.open(sdkconfig_path, "r") as f: @@ -461,8 +455,7 @@ def read_wakenet_from_sdkconfig(sdkconfig_path): model_name = label.split("_SR_WN_")[-1].lower() models.append(model_name) - # Return the first model found, or None if no models - return models[0] if models else None + return models def read_multinet_from_sdkconfig(sdkconfig_path): @@ -514,6 +507,46 @@ def read_multinet_from_sdkconfig(sdkconfig_path): return models +def read_wake_word_type_from_sdkconfig(sdkconfig_path): + """ + Read wake word type configuration from sdkconfig + Returns a dict with wake word type info + """ + if not os.path.exists(sdkconfig_path): + print(f"Warning: sdkconfig file not found: {sdkconfig_path}") + return { + 'use_esp_wake_word': False, + 'use_afe_wake_word': False, + 'use_custom_wake_word': False, + 'wake_word_disabled': True + } + + config_values = { + 'use_esp_wake_word': False, + 'use_afe_wake_word': False, + 'use_custom_wake_word': False, + 'wake_word_disabled': False + } + + with io.open(sdkconfig_path, "r") as f: + for line in f: + line = line.strip("\n") + if line.startswith('#'): + continue + + # Check for wake word type configuration + if 'CONFIG_USE_ESP_WAKE_WORD=y' in line: + config_values['use_esp_wake_word'] = True + elif 'CONFIG_USE_AFE_WAKE_WORD=y' in line: + config_values['use_afe_wake_word'] = True + elif 'CONFIG_USE_CUSTOM_WAKE_WORD=y' in line: + config_values['use_custom_wake_word'] = True + elif 'CONFIG_WAKE_WORD_DISABLED=y' in line: + config_values['wake_word_disabled'] = True + + return config_values + + def read_custom_wake_word_from_sdkconfig(sdkconfig_path): """ Read custom wake word configuration from sdkconfig @@ -591,19 +624,23 @@ def get_language_from_multinet_models(multinet_models): return 'cn' # Default to Chinese -def get_wakenet_model_path(model_name, esp_sr_model_path): +def get_wakenet_model_paths(model_names, esp_sr_model_path): """ - Get the full path to the wakenet model directory + Get the full paths to the wakenet model directories + Returns a list of valid model paths """ - if not model_name: - return None + if not model_names: + return [] - wakenet_model_path = os.path.join(esp_sr_model_path, 'wakenet_model', model_name) - if os.path.exists(wakenet_model_path): - return wakenet_model_path - else: - print(f"Warning: Wakenet model directory not found: {wakenet_model_path}") - return None + valid_paths = [] + for model_name in model_names: + wakenet_model_path = os.path.join(esp_sr_model_path, 'wakenet_model', model_name) + if os.path.exists(wakenet_model_path): + valid_paths.append(wakenet_model_path) + else: + print(f"Warning: Wakenet model directory not found: {wakenet_model_path}") + + return valid_paths def get_multinet_model_paths(model_names, esp_sr_model_path): @@ -661,7 +698,7 @@ def get_emoji_collection_path(default_emoji_collection, xiaozhi_fonts_path): return None -def build_assets_integrated(wakenet_model_path, multinet_model_paths, text_font_path, emoji_collection_path, extra_files_path, output_path, multinet_model_info=None): +def build_assets_integrated(wakenet_model_paths, multinet_model_paths, text_font_path, emoji_collection_path, extra_files_path, output_path, multinet_model_info=None): """ Build assets using integrated functions (no external dependencies) """ @@ -679,7 +716,7 @@ def build_assets_integrated(wakenet_model_path, multinet_model_paths, text_font_ print("Starting to build assets...") # Process each component - srmodels = process_sr_models(wakenet_model_path, multinet_model_paths, temp_build_dir, assets_dir) if (wakenet_model_path or multinet_model_paths) else None + srmodels = process_sr_models(wakenet_model_paths, multinet_model_paths, temp_build_dir, assets_dir) if (wakenet_model_paths or multinet_model_paths) else None text_font = process_text_font(text_font_path, assets_dir) if text_font_path else None emoji_collection = process_emoji_collection(emoji_collection_path, assets_dir) if emoji_collection_path else None extra_files = process_extra_files(extra_files_path, assets_dir) if extra_files_path else None @@ -734,19 +771,17 @@ def main(): args = parser.parse_args() - # Get script directory (not needed anymore but keep for future use) - script_dir = os.path.dirname(os.path.abspath(__file__)) - # Set default paths if not provided - if not args.esp_sr_model_path: - # Default ESP-SR model path relative to project root - project_root = os.path.dirname(os.path.dirname(script_dir)) - args.esp_sr_model_path = os.path.join(project_root, "managed_components", "espressif__esp-sr", "model") - - if not args.xiaozhi_fonts_path: - # Default xiaozhi-fonts path relative to project root - project_root = os.path.dirname(os.path.dirname(script_dir)) - args.xiaozhi_fonts_path = os.path.join(project_root, "managed_components", "78__xiaozhi-fonts") + if not args.esp_sr_model_path or not args.xiaozhi_fonts_path: + # Calculate project root from script location + script_dir = os.path.dirname(os.path.abspath(__file__)) + project_root = os.path.dirname(script_dir) + + if not args.esp_sr_model_path: + args.esp_sr_model_path = os.path.join(project_root, "managed_components", "espressif__esp-sr", "model") + + if not args.xiaozhi_fonts_path: + args.xiaozhi_fonts_path = os.path.join(project_root, "components", "xiaozhi-fonts") print("Building default assets...") print(f" sdkconfig: {args.sdkconfig}") @@ -754,19 +789,40 @@ def main(): print(f" emoji_collection: {args.emoji_collection}") print(f" output: {args.output}") + # Read wake word type configuration from sdkconfig + wake_word_config = read_wake_word_type_from_sdkconfig(args.sdkconfig) + # Read SR models from sdkconfig - wakenet_model_name = read_wakenet_from_sdkconfig(args.sdkconfig) + wakenet_model_names = read_wakenet_from_sdkconfig(args.sdkconfig) multinet_model_names = read_multinet_from_sdkconfig(args.sdkconfig) - # Get model paths - wakenet_model_path = get_wakenet_model_path(wakenet_model_name, args.esp_sr_model_path) - multinet_model_paths = get_multinet_model_paths(multinet_model_names, args.esp_sr_model_path) + # Apply wake word logic to decide which models to package + wakenet_model_paths = [] + multinet_model_paths = [] - # Print model information - if wakenet_model_name: - print(f" wakenet model: {wakenet_model_name}") - if multinet_model_names: - print(f" multinet models: {', '.join(multinet_model_names)}") + # 1. Only package wakenet models if USE_ESP_WAKE_WORD=y or USE_AFE_WAKE_WORD=y + if wake_word_config['use_esp_wake_word'] or wake_word_config['use_afe_wake_word']: + wakenet_model_paths = get_wakenet_model_paths(wakenet_model_names, args.esp_sr_model_path) + elif wakenet_model_names: + print(f" Note: Found wakenet models {wakenet_model_names} but wake word type is not ESP/AFE, skipping") + + # 2. Error check: if USE_CUSTOM_WAKE_WORD=y but no multinet models selected, report error + if wake_word_config['use_custom_wake_word'] and not multinet_model_names: + print("Error: USE_CUSTOM_WAKE_WORD is enabled but no multinet models are selected in sdkconfig") + print("Please select appropriate CONFIG_SR_MN_* options in menuconfig, or disable USE_CUSTOM_WAKE_WORD") + sys.exit(1) + + # 3. Only package multinet models if USE_CUSTOM_WAKE_WORD=y + if wake_word_config['use_custom_wake_word']: + multinet_model_paths = get_multinet_model_paths(multinet_model_names, args.esp_sr_model_path) + elif multinet_model_names: + print(f" Note: Found multinet models {multinet_model_names} but USE_CUSTOM_WAKE_WORD is disabled, skipping") + + # Print model information (only for models that will actually be packaged) + if wakenet_model_paths: + print(f" wakenet models: {', '.join(wakenet_model_names)} (will be packaged)") + if multinet_model_paths: + print(f" multinet models: {', '.join(multinet_model_names)} (will be packaged)") # Get text font path if needed text_font_path = get_text_font_path(args.builtin_text_font, args.xiaozhi_fonts_path) @@ -781,7 +837,7 @@ def main(): custom_wake_word_config = read_custom_wake_word_from_sdkconfig(args.sdkconfig) multinet_model_info = None - if custom_wake_word_config and multinet_model_names: + if custom_wake_word_config and multinet_model_paths: # Determine language from multinet models language = get_language_from_multinet_models(multinet_model_names) @@ -803,7 +859,7 @@ def main(): print(f" wake word threshold: {custom_wake_word_config['threshold']}") # Check if we have anything to build - if not wakenet_model_path and not multinet_model_paths and not text_font_path and not emoji_collection_path and not extra_files_path and not multinet_model_info: + if not wakenet_model_paths and not multinet_model_paths and not text_font_path and not emoji_collection_path and not extra_files_path and not multinet_model_info: print("Warning: No assets to build (no SR models, text font, emoji collection, extra files, or custom wake word)") # Create an empty assets.bin file os.makedirs(os.path.dirname(args.output), exist_ok=True) @@ -813,7 +869,7 @@ def main(): return # Build the assets - success = build_assets_integrated(wakenet_model_path, multinet_model_paths, text_font_path, emoji_collection_path, + success = build_assets_integrated(wakenet_model_paths, multinet_model_paths, text_font_path, emoji_collection_path, extra_files_path, args.output, multinet_model_info) if not success: