diff --git a/.gitignore b/.gitignore index ec2ead75..bdb6e847 100644 --- a/.gitignore +++ b/.gitignore @@ -10,5 +10,6 @@ dependencies.lock .env releases/ main/assets/lang_config.h +main/mmap_generate_emoji.h .DS_Store .cache \ No newline at end of file diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index 773bd1ab..414b63b7 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -194,13 +194,14 @@ list(APPEND SOURCES ${BOARD_SOURCES}) if(CONFIG_USE_AUDIO_PROCESSOR) list(APPEND SOURCES "audio_processing/afe_audio_processor.cc") else() - list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc") + list(APPEND SOURCES "audio_processing/no_audio_processor.cc") endif() -if(CONFIG_USE_WAKE_WORD_DETECT) - list(APPEND SOURCES "audio_processing/wake_word_detect.cc") -endif() -if(CONFIG_USE_WAKE_WORD_DETECT_NO_AFE) - list(APPEND SOURCES "audio_processing/wake_word_no_afe.cc") +if(CONFIG_USE_AFE_WAKE_WORD) + list(APPEND SOURCES "audio_processing/afe_wake_word.cc") +elseif(CONFIG_USE_ESP_WAKE_WORD) + list(APPEND SOURCES "audio_processing/esp_wake_word.cc") +else() + list(APPEND SOURCES "audio_processing/no_wake_word.cc") endif() # 根据Kconfig选择语言目录 diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index ac973562..2cd46a73 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -30,152 +30,226 @@ choice BOARD_TYPE Board type. 开发板类型 config BOARD_TYPE_BREAD_COMPACT_WIFI bool "面包板新版接线(WiFi)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_WIFI_LCD bool "面包板新版接线(WiFi)+ LCD" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_ML307 bool "面包板新版接线(ML307 AT)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_BREAD_COMPACT_ESP32 bool "面包板(WiFi) ESP32 DevKit" + depends on IDF_TARGET_ESP32 config BOARD_TYPE_BREAD_COMPACT_ESP32_LCD bool "面包板(WiFi+ LCD) ESP32 DevKit" + depends on IDF_TARGET_ESP32 config BOARD_TYPE_XMINI_C3 bool "虾哥 Mini C3" + depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_ESP32S3_KORVO2_V3 bool "ESP32S3_KORVO2_V3开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_SPARKBOT bool "ESP-SparkBot开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_SPOT_S3 bool "ESP-Spot-S3" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_HI bool "ESP-HI" + depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_ESP_BOX_3 bool "ESP BOX 3" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_BOX bool "ESP BOX" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_BOX_LITE bool "ESP BOX Lite" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_KEVIN_BOX_1 bool "Kevin Box 1" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_KEVIN_BOX_2 bool "Kevin Box 2" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_KEVIN_C3 bool "Kevin C3" + depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_KEVIN_SP_V3_DEV bool "Kevin SP V3开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_KEVIN_SP_V4_DEV bool "Kevin SP V4开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32_CGC bool "ESP32 CGC" + depends on IDF_TARGET_ESP32 config BOARD_TYPE_KEVIN_YUYING_313LCD bool "鱼鹰科技3.13LCD开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LICHUANG_DEV bool "立创·实战派ESP32-S3开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LICHUANG_C3_DEV bool "立创·实战派ESP32-C3开发板" + depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_DF_K10 bool "DFRobot 行空板 k10" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_DF_S3_AI_CAM bool "DFRobot ESP32-S3 AI智能摄像头模块" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MAGICLICK_2P4 bool "神奇按钮 Magiclick_2.4" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MAGICLICK_2P5 bool "神奇按钮 Magiclick_2.5" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MAGICLICK_C3 bool "神奇按钮 Magiclick_C3" + depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_MAGICLICK_C3_V2 bool "神奇按钮 Magiclick_C3_v2" + depends on IDF_TARGET_ESP32C3 config BOARD_TYPE_M5STACK_CORE_S3 bool "M5Stack CoreS3" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_M5STACK_CORE_TAB5 bool "M5Stack Tab5" + depends on IDF_TARGET_ESP32P4 config BOARD_TYPE_ATOMS3_ECHO_BASE bool "AtomS3 + Echo Base" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATOMS3R_ECHO_BASE bool "AtomS3R + Echo Base" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATOMS3R_CAM_M12_ECHO_BASE bool "AtomS3R CAM/M12 + Echo Base" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATOMMATRIX_ECHO_BASE bool "AtomMatrix + Echo Base" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_8 bool "Waveshare ESP32-S3-Touch-AMOLED-1.8" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_75 bool "Waveshare ESP32-S3-Touch-AMOLED-1.75" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Touch_LCD_1_85C bool "Waveshare ESP32-S3-Touch-LCD-1.85C" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Touch_LCD_1_85 bool "Waveshare ESP32-S3-Touch-LCD-1.85" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Touch_LCD_1_46 bool "Waveshare ESP32-S3-Touch-LCD-1.46" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Touch_LCD_3_5 bool "Waveshare ESP32-S3-Touch-LCD-3.5" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32P4_NANO bool "Waveshare ESP32-P4-NANO" + depends on IDF_TARGET_ESP32P4 config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_4B bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-4B" + depends on IDF_TARGET_ESP32P4 config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_XC bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-3.4C or ESP32-P4-WIFI6-Touch-LCD-4C" + depends on IDF_TARGET_ESP32P4 config BOARD_TYPE_TUDOUZI bool "土豆子" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LILYGO_T_CIRCLE_S3 bool "LILYGO T-Circle-S3" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_0_V1_1 bool "LILYGO T-CameraPlus-S3_V1_0_V1_1" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_2 bool "LILYGO T-CameraPlus-S3_V1_2" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA bool "LILYGO T-Display-S3-Pro-MVSRLora" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA_NO_BATTERY bool "LILYGO T-Display-S3-Pro-MVSRLora_No_Battery" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MOVECALL_MOJI_ESP32S3 bool "Movecall Moji 小智AI衍生版" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MOVECALL_CUICAN_ESP32S3 bool "Movecall CuiCan 璀璨·AI吊坠" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATK_DNESP32S3 bool "正点原子DNESP32S3开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATK_DNESP32S3_BOX bool "正点原子DNESP32S3-BOX" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATK_DNESP32S3_BOX0 bool "正点原子DNESP32S3-BOX0" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATK_DNESP32S3M_WIFI bool "正点原子DNESP32S3M-WIFI" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ATK_DNESP32S3M_4G bool "正点原子DNESP32S3M-4G" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_DU_CHATX bool "嘟嘟开发板CHATX(wifi)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32S3_Taiji_Pi bool "太极小派esp32s3" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_XINGZHI_Cube_0_85TFT_WIFI bool "无名科技星智0.85(WIFI)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_XINGZHI_Cube_0_85TFT_ML307 bool "无名科技星智0.85(ML307)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_XINGZHI_Cube_0_96OLED_WIFI bool "无名科技星智0.96(WIFI)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_XINGZHI_Cube_0_96OLED_ML307 bool "无名科技星智0.96(ML307)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_XINGZHI_Cube_1_54TFT_WIFI bool "无名科技星智1.54(WIFI)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_XINGZHI_Cube_1_54TFT_ML307 bool "无名科技星智1.54(ML307)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_SENSECAP_WATCHER bool "SenseCAP Watcher" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_DOIT_S3_AIBOX bool "四博智联AI陪伴盒子" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MIXGO_NOVA bool "元控·青春" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_GENJUTECH_S3_1_54TFT bool "亘具科技1.54(s3)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP_S3_LCD_EV_Board bool "乐鑫ESP S3 LCD EV Board开发板" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ZHENGCHEN_1_54TFT_WIFI bool "征辰科技1.54(WIFI)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ZHENGCHEN_1_54TFT_ML307 bool "征辰科技1.54(ML307)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_MINSI_K08_DUAL bool "敏思科技K08(DUAL)" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32_S3_1_54_MUMA bool "Spotpear ESP32-S3-1.54-MUMA" + depends on IDF_TARGET_ESP32S3 config BOARD_TYPE_ESP32_S3_1_28_BOX bool "Spotpear ESP32-S3-1.28-BOX" + depends on IDF_TARGET_ESP32S3 endchoice choice ESP_S3_LCD_EV_Board_Version_TYPE @@ -270,24 +344,26 @@ config USE_WECHAT_MESSAGE_STYLE help 使用微信聊天界面风格 -config USE_WAKE_WORD_DETECT_NO_AFE +config USE_ESP_WAKE_WORD bool "Enable Wake Word Detection (without AFE)" default y depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5 - -config USE_WAKE_WORD_DETECT - bool "Enable Wake Word Detection" - default y - depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM help - 需要 ESP32 S3 与 AFE 支持 + 支持 ESP32 C3 与 ESP32 C5 + +config USE_AFE_WAKE_WORD + bool "Enable Wake Word Detection (AFE)" + default n + depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM + help + 需要 ESP32 S3 与 PSRAM 支持 config USE_AUDIO_PROCESSOR bool "Enable Audio Noise Reduction" default y - depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM + depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM help - 需要 ESP32 S3 与 AFE 支持 + 需要 ESP32 S3 与 PSRAM 支持 config USE_DEVICE_AEC bool "Enable Device-Side AEC" @@ -297,7 +373,7 @@ config USE_DEVICE_AEC 因为性能不够,不建议和微信聊天界面风格同时开启 config USE_SERVER_AEC - bool "Enable Server-Side AEC" + bool "Enable Server-Side AEC (Unstable)" default n depends on USE_AUDIO_PROCESSOR help diff --git a/main/application.cc b/main/application.cc index 264185e3..c52b155d 100644 --- a/main/application.cc +++ b/main/application.cc @@ -14,7 +14,15 @@ #if CONFIG_USE_AUDIO_PROCESSOR #include "afe_audio_processor.h" #else -#include "dummy_audio_processor.h" +#include "no_audio_processor.h" +#endif + +#if CONFIG_USE_AFE_WAKE_WORD +#include "afe_wake_word.h" +#elif CONFIG_USE_ESP_WAKE_WORD +#include "esp_wake_word.h" +#else +#include "no_wake_word.h" #endif #include @@ -55,7 +63,15 @@ Application::Application() { #if CONFIG_USE_AUDIO_PROCESSOR audio_processor_ = std::make_unique(); #else - audio_processor_ = std::make_unique(); + audio_processor_ = std::make_unique(); +#endif + +#if CONFIG_USE_AFE_WAKE_WORD + wake_word_ = std::make_unique(); +#elif CONFIG_USE_ESP_WAKE_WORD + wake_word_ = std::make_unique(); +#else + wake_word_ = std::make_unique(); #endif esp_timer_create_args_t clock_timer_args = { @@ -129,9 +145,7 @@ void Application::CheckNewVersion() { auto& board = Board::GetInstance(); board.SetPowerSaveMode(false); -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - wake_word_detect_.StopDetection(); -#endif + wake_word_->StopDetection(); // 预先关闭音频输出,避免升级过程有音频操作 auto codec = board.GetAudioCodec(); codec->EnableInput(false); @@ -256,8 +270,6 @@ void Application::PlaySound(const std::string_view& sound) { } background_task_->WaitForCompletion(); - // The assets are encoded at 16000Hz, 60ms frame duration - SetDecodeSampleRate(16000, 60); const char* data = sound.data(); size_t size = sound.size(); for (const char* p = data; p < data + size; ) { @@ -266,6 +278,8 @@ void Application::PlaySound(const std::string_view& sound) { auto payload_size = ntohs(p3->payload_size); AudioStreamPacket packet; + packet.sample_rate = 16000; + packet.frame_duration = 60; packet.payload.resize(payload_size); memcpy(packet.payload.data(), p3->payload, payload_size); p += payload_size; @@ -432,7 +446,7 @@ void Application::Start() { }); protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) { std::lock_guard lock(mutex_); - if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) { + if (device_state_ == kDeviceStateSpeaking && audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) { audio_decode_queue_.emplace_back(std::move(packet)); } }); @@ -442,7 +456,6 @@ void Application::Start() { ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion", protocol_->server_sample_rate(), codec->output_sample_rate()); } - SetDecodeSampleRate(protocol_->server_sample_rate(), protocol_->server_frame_duration()); #if CONFIG_IOT_PROTOCOL_XIAOZHI auto& thing_manager = iot::ThingManager::GetInstance(); @@ -600,28 +613,40 @@ void Application::Start() { } }); -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - wake_word_detect_.Initialize(codec); -#ifdef CONFIG_USE_WAKE_WORD_DETECT - wake_word_detect_.OnWakeWordDetected([this](const std::string& wake_word) { + wake_word_->Initialize(codec); + wake_word_->OnWakeWordDetected([this](const std::string& wake_word) { Schedule([this, &wake_word]() { - if (device_state_ == kDeviceStateIdle) { - SetDeviceState(kDeviceStateConnecting); - wake_word_detect_.EncodeWakeWordData(); + if (!protocol_) { + return; + } - if (!protocol_ || !protocol_->OpenAudioChannel()) { - wake_word_detect_.StartDetection(); - return; + if (device_state_ == kDeviceStateIdle) { + wake_word_->EncodeWakeWordData(); + + if (!protocol_->IsAudioChannelOpened()) { + SetDeviceState(kDeviceStateConnecting); + if (!protocol_->OpenAudioChannel()) { + wake_word_->StartDetection(); + return; + } } - + + ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); +#if CONFIG_USE_AFE_WAKE_WORD AudioStreamPacket packet; // Encode and send the wake word data to the server - while (wake_word_detect_.GetWakeWordOpus(packet.payload)) { + while (wake_word_->GetWakeWordOpus(packet.payload)) { protocol_->SendAudio(packet); } // Set the chat state to wake word detected protocol_->SendWakeWordDetected(wake_word); - ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str()); +#else + // Play the pop up sound to indicate the wake word is detected + // And wait 60ms to make sure the queue has been processed by audio task + ResetDecoder(); + PlaySound(Lang::Sounds::P3_POPUP); + vTaskDelay(pdMS_TO_TICKS(60)); +#endif SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime); } else if (device_state_ == kDeviceStateSpeaking) { AbortSpeaking(kAbortReasonWakeWordDetected); @@ -630,9 +655,7 @@ void Application::Start() { } }); }); -#endif - wake_word_detect_.StartDetection(); -#endif + wake_word_->StartDetection(); // Wait for the new version check to finish xEventGroupWaitBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT, pdTRUE, pdFALSE, portMAX_DELAY); @@ -751,17 +774,14 @@ void Application::OnAudioOutput() { return; } - if (device_state_ == kDeviceStateListening) { - audio_decode_queue_.clear(); - audio_decode_cv_.notify_all(); - return; - } - auto packet = std::move(audio_decode_queue_.front()); audio_decode_queue_.pop_front(); lock.unlock(); audio_decode_cv_.notify_all(); + // Synchronize the sample rate and frame duration + SetDecodeSampleRate(packet.sample_rate, packet.frame_duration); + busy_decoding_audio_ = true; background_task_->Schedule([this, codec, packet = std::move(packet)]() mutable { busy_decoding_audio_ = false; @@ -782,45 +802,48 @@ void Application::OnAudioOutput() { } codec->OutputData(pcm); #ifdef CONFIG_USE_SERVER_AEC - std::lock_guard lock(timestamp_mutex_); - timestamp_queue_.push_back(packet.timestamp); - last_output_timestamp_ = packet.timestamp; + std::lock_guard lock(timestamp_mutex_); + timestamp_queue_.push_back(packet.timestamp); #endif last_output_time_ = std::chrono::steady_clock::now(); }); } void Application::OnAudioInput() { -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - if (wake_word_detect_.IsDetectionRunning()) { + if (wake_word_->IsDetectionRunning()) { std::vector data; - int samples = wake_word_detect_.GetFeedSize(); + int samples = wake_word_->GetFeedSize(); if (samples > 0) { - ReadAudio(data, 16000, samples); - wake_word_detect_.Feed(data); - return; + if (ReadAudio(data, 16000, samples)) { + wake_word_->Feed(data); + return; + } } } -#endif if (audio_processor_->IsRunning()) { std::vector data; int samples = audio_processor_->GetFeedSize(); if (samples > 0) { - ReadAudio(data, 16000, samples); - audio_processor_->Feed(data); - return; + if (ReadAudio(data, 16000, samples)) { + audio_processor_->Feed(data); + return; + } } } vTaskDelay(pdMS_TO_TICKS(OPUS_FRAME_DURATION_MS / 2)); } -void Application::ReadAudio(std::vector& data, int sample_rate, int samples) { +bool Application::ReadAudio(std::vector& data, int sample_rate, int samples) { auto codec = Board::GetInstance().GetAudioCodec(); + if (!codec->input_enabled()) { + return false; + } + if (codec->input_sample_rate() != sample_rate) { data.resize(samples * codec->input_sample_rate() / sample_rate); if (!codec->InputData(data)) { - return; + return false; } if (codec->input_channels() == 2) { auto mic_channel = std::vector(data.size() / 2); @@ -846,9 +869,10 @@ void Application::ReadAudio(std::vector& data, int sample_rate, int sam } else { data.resize(samples); if (!codec->InputData(data)) { - return; + return false; } } + return true; } void Application::AbortSpeaking(AbortReason reason) { @@ -884,17 +908,13 @@ void Application::SetDeviceState(DeviceState state) { display->SetStatus(Lang::Strings::STANDBY); display->SetEmotion("neutral"); audio_processor_->Stop(); - -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - wake_word_detect_.StartDetection(); -#endif + wake_word_->StartDetection(); break; case kDeviceStateConnecting: display->SetStatus(Lang::Strings::CONNECTING); display->SetEmotion("neutral"); display->SetChatMessage("system", ""); timestamp_queue_.clear(); - last_output_timestamp_ = 0; break; case kDeviceStateListening: display->SetStatus(Lang::Strings::LISTENING); @@ -909,14 +929,14 @@ void Application::SetDeviceState(DeviceState state) { // Send the start listening command protocol_->SendStartListening(listening_mode_); if (previous_state == kDeviceStateSpeaking) { + audio_decode_queue_.clear(); + audio_decode_cv_.notify_all(); // FIXME: Wait for the speaker to empty the buffer vTaskDelay(pdMS_TO_TICKS(120)); } opus_encoder_->ResetState(); -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - wake_word_detect_.StopDetection(); -#endif audio_processor_->Start(); + wake_word_->StopDetection(); } break; case kDeviceStateSpeaking: @@ -924,8 +944,11 @@ void Application::SetDeviceState(DeviceState state) { if (listening_mode_ != kListeningModeRealtime) { audio_processor_->Stop(); -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - wake_word_detect_.StartDetection(); + // Only AFE wake word can be detected in speaking mode +#if CONFIG_USE_AFE_WAKE_WORD + wake_word_->StartDetection(); +#else + wake_word_->StopDetection(); #endif } ResetDecoder(); diff --git a/main/application.h b/main/application.h index 0e327a08..fd138447 100644 --- a/main/application.h +++ b/main/application.h @@ -21,12 +21,7 @@ #include "ota.h" #include "background_task.h" #include "audio_processor.h" - -#if CONFIG_USE_WAKE_WORD_DETECT -#include "wake_word_detect.h" -#elif CONFIG_USE_WAKE_WORD_DETECT_NO_AFE -#include "wake_word_no_afe.h" -#endif +#include "wake_word.h" #define SCHEDULE_EVENT (1 << 0) #define SEND_AUDIO_EVENT (1 << 1) @@ -83,14 +78,13 @@ public: void SendMcpMessage(const std::string& payload); void SetAecMode(AecMode mode); AecMode GetAecMode() const { return aec_mode_; } + BackgroundTask* GetBackgroundTask() const { return background_task_; } private: Application(); ~Application(); -#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE - WakeWordDetect wake_word_detect_; -#endif + std::unique_ptr wake_word_; std::unique_ptr audio_processor_; Ota ota_; std::mutex mutex_; @@ -119,7 +113,6 @@ private: // 新增:用于维护音频包的timestamp队列 std::list timestamp_queue_; std::mutex timestamp_mutex_; - std::atomic last_output_timestamp_ = 0; std::unique_ptr opus_encoder_; std::unique_ptr opus_decoder_; @@ -131,7 +124,7 @@ private: void MainEventLoop(); void OnAudioInput(); void OnAudioOutput(); - void ReadAudio(std::vector& data, int sample_rate, int samples); + bool ReadAudio(std::vector& data, int sample_rate, int samples); void ResetDecoder(); void SetDecodeSampleRate(int sample_rate, int frame_duration); void CheckNewVersion(); diff --git a/main/assets/common/popup.p3 b/main/assets/common/popup.p3 new file mode 100644 index 00000000..6bc3060f Binary files /dev/null and b/main/assets/common/popup.p3 differ diff --git a/main/audio_processing/afe_audio_processor.cc b/main/audio_processing/afe_audio_processor.cc index 3415ff59..e456ae4f 100644 --- a/main/audio_processing/afe_audio_processor.cc +++ b/main/audio_processing/afe_audio_processor.cc @@ -3,7 +3,7 @@ #define PROCESSOR_RUNNING 0x01 -static const char* TAG = "AfeAudioProcessor"; +#define TAG "AfeAudioProcessor" AfeAudioProcessor::AfeAudioProcessor() : afe_data_(nullptr) { diff --git a/main/audio_processing/wake_word_detect.cc b/main/audio_processing/afe_wake_word.cc similarity index 85% rename from main/audio_processing/wake_word_detect.cc rename to main/audio_processing/afe_wake_word.cc index 0fce416a..af7bfa06 100644 --- a/main/audio_processing/wake_word_detect.cc +++ b/main/audio_processing/afe_wake_word.cc @@ -1,4 +1,4 @@ -#include "wake_word_detect.h" +#include "afe_wake_word.h" #include "application.h" #include @@ -8,9 +8,9 @@ #define DETECTION_RUNNING_EVENT 1 -static const char* TAG = "WakeWordDetect"; +#define TAG "AfeWakeWord" -WakeWordDetect::WakeWordDetect() +AfeWakeWord::AfeWakeWord() : afe_data_(nullptr), wake_word_pcm_(), wake_word_opus_() { @@ -18,7 +18,7 @@ WakeWordDetect::WakeWordDetect() event_group_ = xEventGroupCreate(); } -WakeWordDetect::~WakeWordDetect() { +AfeWakeWord::~AfeWakeWord() { if (afe_data_ != nullptr) { afe_iface_->destroy(afe_data_); } @@ -30,7 +30,7 @@ WakeWordDetect::~WakeWordDetect() { vEventGroupDelete(event_group_); } -void WakeWordDetect::Initialize(AudioCodec* codec) { +void AfeWakeWord::Initialize(AudioCodec* codec) { codec_ = codec; int ref_num = codec_->input_reference() ? 1 : 0; @@ -67,46 +67,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) { afe_data_ = afe_iface_->create_from_config(afe_config); xTaskCreate([](void* arg) { - auto this_ = (WakeWordDetect*)arg; + auto this_ = (AfeWakeWord*)arg; this_->AudioDetectionTask(); vTaskDelete(NULL); }, "audio_detection", 4096, this, 3, nullptr); } -void WakeWordDetect::OnWakeWordDetected(std::function callback) { +void AfeWakeWord::OnWakeWordDetected(std::function callback) { wake_word_detected_callback_ = callback; } -void WakeWordDetect::StartDetection() { +void AfeWakeWord::StartDetection() { xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT); } -void WakeWordDetect::StopDetection() { +void AfeWakeWord::StopDetection() { xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT); if (afe_data_ != nullptr) { afe_iface_->reset_buffer(afe_data_); } } -bool WakeWordDetect::IsDetectionRunning() { +bool AfeWakeWord::IsDetectionRunning() { return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT; } -void WakeWordDetect::Feed(const std::vector& data) { +void AfeWakeWord::Feed(const std::vector& data) { if (afe_data_ == nullptr) { return; } afe_iface_->feed(afe_data_, data.data()); } -size_t WakeWordDetect::GetFeedSize() { +size_t AfeWakeWord::GetFeedSize() { if (afe_data_ == nullptr) { return 0; } return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels(); } -void WakeWordDetect::AudioDetectionTask() { +void AfeWakeWord::AudioDetectionTask() { auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_); auto feed_size = afe_iface_->get_feed_chunksize(afe_data_); ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d", @@ -121,7 +121,7 @@ void WakeWordDetect::AudioDetectionTask() { } // Store the wake word data for voice recognition, like who is speaking - StoreWakeWordData((uint16_t*)res->data, res->data_size / sizeof(uint16_t)); + StoreWakeWordData(res->data, res->data_size / sizeof(int16_t)); if (res->wakeup_state == WAKENET_DETECTED) { StopDetection(); @@ -134,7 +134,7 @@ void WakeWordDetect::AudioDetectionTask() { } } -void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) { +void AfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) { // store audio data to wake_word_pcm_ wake_word_pcm_.emplace_back(std::vector(data, data + samples)); // keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512) @@ -143,13 +143,13 @@ void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) { } } -void WakeWordDetect::EncodeWakeWordData() { +void AfeWakeWord::EncodeWakeWordData() { wake_word_opus_.clear(); if (wake_word_encode_task_stack_ == nullptr) { wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM); } wake_word_encode_task_ = xTaskCreateStatic([](void* arg) { - auto this_ = (WakeWordDetect*)arg; + auto this_ = (AfeWakeWord*)arg; { auto start_time = esp_timer_get_time(); auto encoder = std::make_unique(16000, 1, OPUS_FRAME_DURATION_MS); @@ -176,7 +176,7 @@ void WakeWordDetect::EncodeWakeWordData() { }, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_); } -bool WakeWordDetect::GetWakeWordOpus(std::vector& opus) { +bool AfeWakeWord::GetWakeWordOpus(std::vector& opus) { std::unique_lock lock(wake_word_mutex_); wake_word_cv_.wait(lock, [this]() { return !wake_word_opus_.empty(); diff --git a/main/audio_processing/wake_word_detect.h b/main/audio_processing/afe_wake_word.h similarity index 87% rename from main/audio_processing/wake_word_detect.h rename to main/audio_processing/afe_wake_word.h index 583b50c9..795a20b7 100644 --- a/main/audio_processing/wake_word_detect.h +++ b/main/audio_processing/afe_wake_word.h @@ -1,5 +1,5 @@ -#ifndef WAKE_WORD_DETECT_H -#define WAKE_WORD_DETECT_H +#ifndef AFE_WAKE_WORD_H +#define AFE_WAKE_WORD_H #include #include @@ -16,11 +16,12 @@ #include #include "audio_codec.h" +#include "wake_word.h" -class WakeWordDetect { +class AfeWakeWord : public WakeWord { public: - WakeWordDetect(); - ~WakeWordDetect(); + AfeWakeWord(); + ~AfeWakeWord(); void Initialize(AudioCodec* codec); void Feed(const std::vector& data); @@ -51,7 +52,7 @@ private: std::mutex wake_word_mutex_; std::condition_variable wake_word_cv_; - void StoreWakeWordData(uint16_t* data, size_t size); + void StoreWakeWordData(const int16_t* data, size_t size); void AudioDetectionTask(); }; diff --git a/main/audio_processing/wake_word_no_afe.cc b/main/audio_processing/esp_wake_word.cc similarity index 55% rename from main/audio_processing/wake_word_no_afe.cc rename to main/audio_processing/esp_wake_word.cc index 4bffb020..a71b8f5a 100644 --- a/main/audio_processing/wake_word_no_afe.cc +++ b/main/audio_processing/esp_wake_word.cc @@ -1,4 +1,4 @@ -#include "wake_word_no_afe.h" +#include "esp_wake_word.h" #include "application.h" #include @@ -8,13 +8,13 @@ #define DETECTION_RUNNING_EVENT 1 -static const char* TAG = "WakeWordDetect"; +#define TAG "EspWakeWord" -WakeWordDetect::WakeWordDetect() { +EspWakeWord::EspWakeWord() { event_group_ = xEventGroupCreate(); } -WakeWordDetect::~WakeWordDetect() { +EspWakeWord::~EspWakeWord() { if (wakenet_data_ != nullptr) { wakenet_iface_->destroy(wakenet_data_); esp_srmodel_deinit(wakenet_model_); @@ -23,13 +23,16 @@ WakeWordDetect::~WakeWordDetect() { vEventGroupDelete(event_group_); } -void WakeWordDetect::Initialize(AudioCodec* codec) { +void EspWakeWord::Initialize(AudioCodec* codec) { codec_ = codec; wakenet_model_ = esp_srmodel_init("model"); if(wakenet_model_->num > 1) { ESP_LOGW(TAG, "More than one model found, using the first one"); + } else if (wakenet_model_->num == 0) { + ESP_LOGE(TAG, "No model found"); + return; } char *model_name = wakenet_model_->model_name[0]; wakenet_iface_ = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name); @@ -40,28 +43,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) { ESP_LOGI(TAG, "Wake word(%s),freq: %d, chunksize: %d", model_name, frequency, audio_chunksize); } -void WakeWordDetect::StartDetection() { +void EspWakeWord::OnWakeWordDetected(std::function callback) { + wake_word_detected_callback_ = callback; +} + +void EspWakeWord::StartDetection() { + ESP_LOGI(TAG, "Start wake word detection"); xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT); } -void WakeWordDetect::StopDetection() { +void EspWakeWord::StopDetection() { + ESP_LOGI(TAG, "Stop wake word detection"); xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT); } -bool WakeWordDetect::IsDetectionRunning() { +bool EspWakeWord::IsDetectionRunning() { return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT; } -void WakeWordDetect::Feed(const std::vector& data) { +void EspWakeWord::Feed(const std::vector& data) { int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data()); if (res > 0) { - ESP_LOGI(TAG, "Wake word detected"); - auto& app = Application::GetInstance(); - app.ToggleChatState(); + StopDetection(); + last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res); + + if (wake_word_detected_callback_) { + wake_word_detected_callback_(last_detected_wake_word_); + } } } -size_t WakeWordDetect::GetFeedSize() { - +size_t EspWakeWord::GetFeedSize() { + if (wakenet_data_ == nullptr) { + return 0; + } return wakenet_iface_->get_samp_chunksize(wakenet_data_) * codec_->input_channels(); } + +void EspWakeWord::EncodeWakeWordData() { +} + +bool EspWakeWord::GetWakeWordOpus(std::vector& opus) { + return false; +} diff --git a/main/audio_processing/wake_word_no_afe.h b/main/audio_processing/esp_wake_word.h similarity index 54% rename from main/audio_processing/wake_word_no_afe.h rename to main/audio_processing/esp_wake_word.h index 90e1110d..189243c8 100644 --- a/main/audio_processing/wake_word_no_afe.h +++ b/main/audio_processing/esp_wake_word.h @@ -1,13 +1,13 @@ -#ifndef WAKE_WORD_DETECT_H -#define WAKE_WORD_DETECT_H +#ifndef ESP_WAKE_WORD_H +#define ESP_WAKE_WORD_H #include #include #include -#include "model_path.h" -#include "esp_wn_iface.h" -#include "esp_wn_models.h" +#include +#include +#include #include #include @@ -17,19 +17,23 @@ #include #include "audio_codec.h" -#include +#include "wake_word.h" -class WakeWordDetect { +class EspWakeWord : public WakeWord { public: - WakeWordDetect(); - ~WakeWordDetect(); + EspWakeWord(); + ~EspWakeWord(); void Initialize(AudioCodec* codec); void Feed(const std::vector& data); + void OnWakeWordDetected(std::function callback); void StartDetection(); void StopDetection(); bool IsDetectionRunning(); size_t GetFeedSize(); + void EncodeWakeWordData(); + bool GetWakeWordOpus(std::vector& opus); + const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; } private: esp_wn_iface_t *wakenet_iface_ = nullptr; @@ -37,6 +41,9 @@ private: srmodel_list_t *wakenet_model_ = nullptr; EventGroupHandle_t event_group_; AudioCodec* codec_ = nullptr; + + std::function wake_word_detected_callback_; + std::string last_detected_wake_word_; }; #endif diff --git a/main/audio_processing/dummy_audio_processor.cc b/main/audio_processing/no_audio_processor.cc similarity index 51% rename from main/audio_processing/dummy_audio_processor.cc rename to main/audio_processing/no_audio_processor.cc index 7cb606d7..a84f8af1 100644 --- a/main/audio_processing/dummy_audio_processor.cc +++ b/main/audio_processing/no_audio_processor.cc @@ -1,13 +1,13 @@ -#include "dummy_audio_processor.h" +#include "no_audio_processor.h" #include -#define TAG "DummyAudioProcessor" +#define TAG "NoAudioProcessor" -void DummyAudioProcessor::Initialize(AudioCodec* codec) { +void NoAudioProcessor::Initialize(AudioCodec* codec) { codec_ = codec; } -void DummyAudioProcessor::Feed(const std::vector& data) { +void NoAudioProcessor::Feed(const std::vector& data) { if (!is_running_ || !output_callback_) { return; } @@ -15,27 +15,27 @@ void DummyAudioProcessor::Feed(const std::vector& data) { output_callback_(std::vector(data)); } -void DummyAudioProcessor::Start() { +void NoAudioProcessor::Start() { is_running_ = true; } -void DummyAudioProcessor::Stop() { +void NoAudioProcessor::Stop() { is_running_ = false; } -bool DummyAudioProcessor::IsRunning() { +bool NoAudioProcessor::IsRunning() { return is_running_; } -void DummyAudioProcessor::OnOutput(std::function&& data)> callback) { +void NoAudioProcessor::OnOutput(std::function&& data)> callback) { output_callback_ = callback; } -void DummyAudioProcessor::OnVadStateChange(std::function callback) { +void NoAudioProcessor::OnVadStateChange(std::function callback) { vad_state_change_callback_ = callback; } -size_t DummyAudioProcessor::GetFeedSize() { +size_t NoAudioProcessor::GetFeedSize() { if (!codec_) { return 0; } @@ -43,7 +43,7 @@ size_t DummyAudioProcessor::GetFeedSize() { return 30 * codec_->input_sample_rate() / 1000; } -void DummyAudioProcessor::EnableDeviceAec(bool enable) { +void NoAudioProcessor::EnableDeviceAec(bool enable) { if (enable) { ESP_LOGE(TAG, "Device AEC is not supported"); } diff --git a/main/audio_processing/dummy_audio_processor.h b/main/audio_processing/no_audio_processor.h similarity index 86% rename from main/audio_processing/dummy_audio_processor.h rename to main/audio_processing/no_audio_processor.h index 4383b7a9..ed54741a 100644 --- a/main/audio_processing/dummy_audio_processor.h +++ b/main/audio_processing/no_audio_processor.h @@ -7,10 +7,10 @@ #include "audio_processor.h" #include "audio_codec.h" -class DummyAudioProcessor : public AudioProcessor { +class NoAudioProcessor : public AudioProcessor { public: - DummyAudioProcessor() = default; - ~DummyAudioProcessor() = default; + NoAudioProcessor() = default; + ~NoAudioProcessor() = default; void Initialize(AudioCodec* codec) override; void Feed(const std::vector& data) override; diff --git a/main/audio_processing/no_wake_word.cc b/main/audio_processing/no_wake_word.cc new file mode 100644 index 00000000..34a85543 --- /dev/null +++ b/main/audio_processing/no_wake_word.cc @@ -0,0 +1,45 @@ +#include "no_wake_word.h" +#include + +#define TAG "NoWakeWord" + +void NoWakeWord::Initialize(AudioCodec* codec) { + codec_ = codec; +} + +void NoWakeWord::Feed(const std::vector& data) { + // Do nothing - no wake word processing +} + +void NoWakeWord::OnWakeWordDetected(std::function callback) { + // Do nothing - no wake word processing +} + +void NoWakeWord::StartDetection() { + // Do nothing - no wake word processing +} + +void NoWakeWord::StopDetection() { + // Do nothing - no wake word processing +} + +bool NoWakeWord::IsDetectionRunning() { + return false; // No wake word processing +} + +size_t NoWakeWord::GetFeedSize() { + return 0; // No specific feed size requirement +} + +void NoWakeWord::EncodeWakeWordData() { + // Do nothing - no encoding needed +} + +bool NoWakeWord::GetWakeWordOpus(std::vector& opus) { + opus.clear(); + return false; // No opus data available +} + +const std::string& NoWakeWord::GetLastDetectedWakeWord() const { + return ""; // No wake word detected +} \ No newline at end of file diff --git a/main/audio_processing/no_wake_word.h b/main/audio_processing/no_wake_word.h new file mode 100644 index 00000000..c367595b --- /dev/null +++ b/main/audio_processing/no_wake_word.h @@ -0,0 +1,31 @@ +#ifndef NO_WAKE_WORD_H +#define NO_WAKE_WORD_H + +#include +#include +#include + +#include "wake_word.h" +#include "audio_codec.h" + +class NoWakeWord : public WakeWord { +public: + NoWakeWord() = default; + ~NoWakeWord() = default; + + void Initialize(AudioCodec* codec) override; + void Feed(const std::vector& data) override; + void OnWakeWordDetected(std::function callback) override; + void StartDetection() override; + void StopDetection() override; + bool IsDetectionRunning() override; + size_t GetFeedSize() override; + void EncodeWakeWordData() override; + bool GetWakeWordOpus(std::vector& opus) override; + const std::string& GetLastDetectedWakeWord() const override; + +private: + AudioCodec* codec_ = nullptr; +}; + +#endif \ No newline at end of file diff --git a/main/audio_processing/wake_word.h b/main/audio_processing/wake_word.h new file mode 100644 index 00000000..395f96cd --- /dev/null +++ b/main/audio_processing/wake_word.h @@ -0,0 +1,26 @@ +#ifndef WAKE_WORD_H +#define WAKE_WORD_H + +#include +#include +#include + +#include "audio_codec.h" + +class WakeWord { +public: + virtual ~WakeWord() = default; + + virtual void Initialize(AudioCodec* codec) = 0; + virtual void Feed(const std::vector& data) = 0; + virtual void OnWakeWordDetected(std::function callback) = 0; + virtual void StartDetection() = 0; + virtual void StopDetection() = 0; + virtual bool IsDetectionRunning() = 0; + virtual size_t GetFeedSize() = 0; + virtual void EncodeWakeWordData() = 0; + virtual bool GetWakeWordOpus(std::vector& opus) = 0; + virtual const std::string& GetLastDetectedWakeWord() const = 0; +}; + +#endif diff --git a/main/boards/esp-hi/config.json b/main/boards/esp-hi/config.json index bd31a9c3..f432c446 100644 --- a/main/boards/esp-hi/config.json +++ b/main/boards/esp-hi/config.json @@ -30,7 +30,8 @@ "CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA=y", "CONFIG_NEWLIB_NANO_FORMAT=y", "CONFIG_MMAP_FILE_NAME_LENGTH=25", - "CONFIG_ESP_CONSOLE_NONE=y" + "CONFIG_ESP_CONSOLE_NONE=y", + "CONFIG_IOT_PROTOCOL_XIAOZHI=y" ] } ] diff --git a/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc b/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc index 629a34c1..85b24500 100644 --- a/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc +++ b/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc @@ -70,7 +70,7 @@ private: } void InitializePowerSaveTimer() { - power_save_timer_ = new PowerSaveTimer(160, 60); + power_save_timer_ = new PowerSaveTimer(240, 60); power_save_timer_->OnEnterSleepMode([this]() { ESP_LOGI(TAG, "Enabling sleep mode"); auto display = GetDisplay(); diff --git a/main/boards/lichuang-c3-dev/config.json b/main/boards/lichuang-c3-dev/config.json index cdc508f1..d27ae460 100644 --- a/main/boards/lichuang-c3-dev/config.json +++ b/main/boards/lichuang-c3-dev/config.json @@ -5,7 +5,9 @@ "name": "lichuang-c3-dev", "sdkconfig_append": [ "CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y", - "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\"" + "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\"", + "CONFIG_ESP_WIFI_ENTERPRISE_SUPPORT=n", + "CONFIG_LWIP_IPV6=n" ] } ] diff --git a/main/boards/magiclick-c3-v2/config.json b/main/boards/magiclick-c3-v2/config.json index f3eeb8f6..4503ebdb 100644 --- a/main/boards/magiclick-c3-v2/config.json +++ b/main/boards/magiclick-c3-v2/config.json @@ -5,7 +5,8 @@ "name": "magiclick-c3-v2", "sdkconfig_append": [ "CONFIG_PM_ENABLE=y", - "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y" + "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y", + "CONFIG_USE_ESP_WAKE_WORD=n" ] } ] diff --git a/main/boards/magiclick-c3/config.json b/main/boards/magiclick-c3/config.json index 09eb3fdf..34d1471d 100644 --- a/main/boards/magiclick-c3/config.json +++ b/main/boards/magiclick-c3/config.json @@ -5,7 +5,8 @@ "name": "magiclick-c3", "sdkconfig_append": [ "CONFIG_PM_ENABLE=y", - "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y" + "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y", + "CONFIG_USE_ESP_WAKE_WORD=n" ] } ] diff --git a/main/boards/xmini-c3/config.json b/main/boards/xmini-c3/config.json index d6d2796c..d497d740 100644 --- a/main/boards/xmini-c3/config.json +++ b/main/boards/xmini-c3/config.json @@ -5,7 +5,8 @@ "name": "xmini-c3", "sdkconfig_append": [ "CONFIG_PM_ENABLE=y", - "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y" + "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y", + "CONFIG_USE_ESP_WAKE_WORD=y" ] } ] diff --git a/main/boards/xmini-c3/xmini_c3_board.cc b/main/boards/xmini-c3/xmini_c3_board.cc index 2c3011a2..847b8754 100644 --- a/main/boards/xmini-c3/xmini_c3_board.cc +++ b/main/boards/xmini-c3/xmini_c3_board.cc @@ -30,10 +30,10 @@ private: Display* display_ = nullptr; Button boot_button_; bool press_to_talk_enabled_ = false; - PowerSaveTimer* power_save_timer_; + PowerSaveTimer* power_save_timer_ = nullptr; void InitializePowerSaveTimer() { - power_save_timer_ = new PowerSaveTimer(160, 60); + power_save_timer_ = new PowerSaveTimer(160, 600); power_save_timer_->OnEnterSleepMode([this]() { ESP_LOGI(TAG, "Enabling sleep mode"); auto display = GetDisplay(); @@ -130,7 +130,9 @@ private: } }); boot_button_.OnPressDown([this]() { - power_save_timer_->WakeUp(); + if (power_save_timer_) { + power_save_timer_->WakeUp(); + } if (press_to_talk_enabled_) { Application::GetInstance().StartListening(); } diff --git a/main/protocols/mqtt_protocol.cc b/main/protocols/mqtt_protocol.cc index bf96b12d..6cf59bbb 100644 --- a/main/protocols/mqtt_protocol.cc +++ b/main/protocols/mqtt_protocol.cc @@ -227,6 +227,8 @@ bool MqttProtocol::OpenAudioChannel() { auto nonce = (uint8_t*)data.data(); auto encrypted = (uint8_t*)data.data() + aes_nonce_.size(); AudioStreamPacket packet; + packet.sample_rate = server_sample_rate_; + packet.frame_duration = server_frame_duration_; packet.timestamp = timestamp; packet.payload.resize(decrypted_size); int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet.payload.data()); diff --git a/main/protocols/protocol.h b/main/protocols/protocol.h index e23561ee..31f1ac43 100644 --- a/main/protocols/protocol.h +++ b/main/protocols/protocol.h @@ -8,6 +8,8 @@ #include struct AudioStreamPacket { + int sample_rate = 0; + int frame_duration = 0; uint32_t timestamp = 0; std::vector payload; }; diff --git a/main/protocols/websocket_protocol.cc b/main/protocols/websocket_protocol.cc index 45c00957..2d5c7840 100644 --- a/main/protocols/websocket_protocol.cc +++ b/main/protocols/websocket_protocol.cc @@ -124,6 +124,8 @@ bool WebsocketProtocol::OpenAudioChannel() { bp2->payload_size = ntohl(bp2->payload_size); auto payload = (uint8_t*)bp2->payload; on_incoming_audio_(AudioStreamPacket{ + .sample_rate = server_sample_rate_, + .frame_duration = server_frame_duration_, .timestamp = bp2->timestamp, .payload = std::vector(payload, payload + bp2->payload_size) }); @@ -133,11 +135,15 @@ bool WebsocketProtocol::OpenAudioChannel() { bp3->payload_size = ntohs(bp3->payload_size); auto payload = (uint8_t*)bp3->payload; on_incoming_audio_(AudioStreamPacket{ + .sample_rate = server_sample_rate_, + .frame_duration = server_frame_duration_, .timestamp = 0, .payload = std::vector(payload, payload + bp3->payload_size) }); } else { on_incoming_audio_(AudioStreamPacket{ + .sample_rate = server_sample_rate_, + .frame_duration = server_frame_duration_, .timestamp = 0, .payload = std::vector((uint8_t*)data, (uint8_t*)data + len) }); diff --git a/sdkconfig.defaults.esp32c3 b/sdkconfig.defaults.esp32c3 index e3ac6c78..725e561b 100644 --- a/sdkconfig.defaults.esp32c3 +++ b/sdkconfig.defaults.esp32c3 @@ -1,2 +1,3 @@ CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y +CONFIG_SR_WN_WN9S_NIHAOXIAOZHI=y