diff --git a/.gitignore b/.gitignore
index ec2ead75..bdb6e847 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,5 +10,6 @@ dependencies.lock
 .env
 releases/
 main/assets/lang_config.h
+main/mmap_generate_emoji.h
 .DS_Store
 .cache
\ No newline at end of file
diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt
index 773bd1ab..414b63b7 100644
--- a/main/CMakeLists.txt
+++ b/main/CMakeLists.txt
@@ -194,13 +194,14 @@ list(APPEND SOURCES ${BOARD_SOURCES})
 if(CONFIG_USE_AUDIO_PROCESSOR)
     list(APPEND SOURCES "audio_processing/afe_audio_processor.cc")
 else()
-    list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc")
+    list(APPEND SOURCES "audio_processing/no_audio_processor.cc")
 endif()
-if(CONFIG_USE_WAKE_WORD_DETECT)
-    list(APPEND SOURCES "audio_processing/wake_word_detect.cc")
-endif()
-if(CONFIG_USE_WAKE_WORD_DETECT_NO_AFE)
-    list(APPEND SOURCES "audio_processing/wake_word_no_afe.cc")
+if(CONFIG_USE_AFE_WAKE_WORD)
+    list(APPEND SOURCES "audio_processing/afe_wake_word.cc")
+elseif(CONFIG_USE_ESP_WAKE_WORD)
+    list(APPEND SOURCES "audio_processing/esp_wake_word.cc")
+else()
+    list(APPEND SOURCES "audio_processing/no_wake_word.cc")
 endif()
 
 # 根据Kconfig选择语言目录
diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild
index ac973562..2cd46a73 100644
--- a/main/Kconfig.projbuild
+++ b/main/Kconfig.projbuild
@@ -30,152 +30,226 @@ choice BOARD_TYPE
         Board type. 开发板类型
     config BOARD_TYPE_BREAD_COMPACT_WIFI
         bool "面包板新版接线（WiFi）"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_BREAD_COMPACT_WIFI_LCD
         bool "面包板新版接线（WiFi）+ LCD"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_BREAD_COMPACT_ML307
         bool "面包板新版接线（ML307 AT）"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_BREAD_COMPACT_ESP32
         bool "面包板（WiFi） ESP32 DevKit"
+        depends on IDF_TARGET_ESP32
     config BOARD_TYPE_BREAD_COMPACT_ESP32_LCD
         bool "面包板（WiFi+ LCD） ESP32 DevKit"
+        depends on IDF_TARGET_ESP32
     config BOARD_TYPE_XMINI_C3
         bool "虾哥 Mini C3"
+        depends on IDF_TARGET_ESP32C3
     config BOARD_TYPE_ESP32S3_KORVO2_V3
         bool "ESP32S3_KORVO2_V3开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP_SPARKBOT
         bool "ESP-SparkBot开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP_SPOT_S3
         bool "ESP-Spot-S3"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP_HI
         bool "ESP-HI"
+        depends on IDF_TARGET_ESP32C3
     config BOARD_TYPE_ESP_BOX_3
         bool "ESP BOX 3"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP_BOX
         bool "ESP BOX"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP_BOX_LITE
         bool "ESP BOX Lite"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_KEVIN_BOX_1
         bool "Kevin Box 1"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_KEVIN_BOX_2
         bool "Kevin Box 2"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_KEVIN_C3
         bool "Kevin C3"
+        depends on IDF_TARGET_ESP32C3
     config BOARD_TYPE_KEVIN_SP_V3_DEV
         bool "Kevin SP V3开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_KEVIN_SP_V4_DEV
         bool "Kevin SP V4开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32_CGC
         bool "ESP32 CGC"
+        depends on IDF_TARGET_ESP32
     config BOARD_TYPE_KEVIN_YUYING_313LCD
         bool "鱼鹰科技3.13LCD开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LICHUANG_DEV
         bool "立创·实战派ESP32-S3开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LICHUANG_C3_DEV
         bool "立创·实战派ESP32-C3开发板"
+        depends on IDF_TARGET_ESP32C3
     config BOARD_TYPE_DF_K10
         bool "DFRobot 行空板 k10"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_DF_S3_AI_CAM
         bool "DFRobot ESP32-S3 AI智能摄像头模块"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MAGICLICK_2P4
         bool "神奇按钮 Magiclick_2.4"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MAGICLICK_2P5
         bool "神奇按钮 Magiclick_2.5"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MAGICLICK_C3
         bool "神奇按钮 Magiclick_C3"
+        depends on IDF_TARGET_ESP32C3
     config BOARD_TYPE_MAGICLICK_C3_V2
         bool "神奇按钮 Magiclick_C3_v2"
+        depends on IDF_TARGET_ESP32C3
     config BOARD_TYPE_M5STACK_CORE_S3
         bool "M5Stack CoreS3"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_M5STACK_CORE_TAB5
         bool "M5Stack Tab5"
+        depends on IDF_TARGET_ESP32P4
     config BOARD_TYPE_ATOMS3_ECHO_BASE
         bool "AtomS3 + Echo Base"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATOMS3R_ECHO_BASE
         bool "AtomS3R + Echo Base"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATOMS3R_CAM_M12_ECHO_BASE
         bool "AtomS3R CAM/M12 + Echo Base"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATOMMATRIX_ECHO_BASE
         bool "AtomMatrix + Echo Base"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_8
         bool "Waveshare ESP32-S3-Touch-AMOLED-1.8"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Touch_AMOLED_1_75
         bool "Waveshare ESP32-S3-Touch-AMOLED-1.75"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Touch_LCD_1_85C
         bool "Waveshare ESP32-S3-Touch-LCD-1.85C"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Touch_LCD_1_85
         bool "Waveshare ESP32-S3-Touch-LCD-1.85"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Touch_LCD_1_46
         bool "Waveshare ESP32-S3-Touch-LCD-1.46"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Touch_LCD_3_5
         bool "Waveshare ESP32-S3-Touch-LCD-3.5"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32P4_NANO
         bool "Waveshare ESP32-P4-NANO"
+        depends on IDF_TARGET_ESP32P4
     config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_4B
         bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-4B"
+        depends on IDF_TARGET_ESP32P4
     config BOARD_TYPE_ESP32P4_WIFI6_Touch_LCD_XC
         bool "Waveshare ESP32-P4-WIFI6-Touch-LCD-3.4C or ESP32-P4-WIFI6-Touch-LCD-4C"
+        depends on IDF_TARGET_ESP32P4
     config BOARD_TYPE_TUDOUZI
         bool "土豆子"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LILYGO_T_CIRCLE_S3
         bool "LILYGO T-Circle-S3"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_0_V1_1
         bool "LILYGO T-CameraPlus-S3_V1_0_V1_1"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LILYGO_T_CAMERAPLUS_S3_V1_2
         bool "LILYGO T-CameraPlus-S3_V1_2"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA
         bool "LILYGO T-Display-S3-Pro-MVSRLora"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_LILYGO_T_DISPLAY_S3_PRO_MVSRLORA_NO_BATTERY
         bool "LILYGO T-Display-S3-Pro-MVSRLora_No_Battery"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MOVECALL_MOJI_ESP32S3
         bool "Movecall Moji 小智AI衍生版"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MOVECALL_CUICAN_ESP32S3
         bool "Movecall CuiCan 璀璨·AI吊坠"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATK_DNESP32S3
         bool "正点原子DNESP32S3开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATK_DNESP32S3_BOX
         bool "正点原子DNESP32S3-BOX"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATK_DNESP32S3_BOX0
         bool "正点原子DNESP32S3-BOX0"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATK_DNESP32S3M_WIFI
         bool "正点原子DNESP32S3M-WIFI"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ATK_DNESP32S3M_4G
         bool "正点原子DNESP32S3M-4G"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_DU_CHATX
         bool "嘟嘟开发板CHATX(wifi)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32S3_Taiji_Pi
         bool "太极小派esp32s3"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_XINGZHI_Cube_0_85TFT_WIFI
         bool "无名科技星智0.85(WIFI)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_XINGZHI_Cube_0_85TFT_ML307
         bool "无名科技星智0.85(ML307)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_XINGZHI_Cube_0_96OLED_WIFI
         bool "无名科技星智0.96(WIFI)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_XINGZHI_Cube_0_96OLED_ML307
         bool "无名科技星智0.96(ML307)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_XINGZHI_Cube_1_54TFT_WIFI
         bool "无名科技星智1.54(WIFI)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_XINGZHI_Cube_1_54TFT_ML307
         bool "无名科技星智1.54(ML307)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_SENSECAP_WATCHER
         bool "SenseCAP Watcher"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_DOIT_S3_AIBOX
         bool "四博智联AI陪伴盒子"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MIXGO_NOVA
         bool "元控·青春"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_GENJUTECH_S3_1_54TFT
         bool "亘具科技1.54(s3)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP_S3_LCD_EV_Board
         bool "乐鑫ESP S3 LCD EV Board开发板"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ZHENGCHEN_1_54TFT_WIFI
         bool "征辰科技1.54(WIFI)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ZHENGCHEN_1_54TFT_ML307
         bool "征辰科技1.54(ML307)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_MINSI_K08_DUAL
         bool "敏思科技K08(DUAL)"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32_S3_1_54_MUMA
         bool "Spotpear ESP32-S3-1.54-MUMA"
+        depends on IDF_TARGET_ESP32S3
     config BOARD_TYPE_ESP32_S3_1_28_BOX
         bool "Spotpear ESP32-S3-1.28-BOX"
+        depends on IDF_TARGET_ESP32S3
 endchoice
 
 choice ESP_S3_LCD_EV_Board_Version_TYPE
@@ -270,24 +344,26 @@ config USE_WECHAT_MESSAGE_STYLE
     help
         使用微信聊天界面风格
 
-config USE_WAKE_WORD_DETECT_NO_AFE
+config USE_ESP_WAKE_WORD
     bool "Enable Wake Word Detection (without AFE)"
     default y
     depends on IDF_TARGET_ESP32C3 || IDF_TARGET_ESP32C5
-
-config USE_WAKE_WORD_DETECT
-    bool "Enable Wake Word Detection"
-    default y
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM
     help
-        需要 ESP32 S3 与 AFE 支持
+        支持 ESP32 C3 与 ESP32 C5
+
+config USE_AFE_WAKE_WORD
+    bool "Enable Wake Word Detection (AFE)"
+    default n
+    depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
+    help
+        需要 ESP32 S3 与 PSRAM 支持
 
 config USE_AUDIO_PROCESSOR
     bool "Enable Audio Noise Reduction"
     default y
-    depends on IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4 && SPIRAM
+    depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
     help
-        需要 ESP32 S3 与 AFE 支持
+        需要 ESP32 S3 与 PSRAM 支持
 
 config USE_DEVICE_AEC
     bool "Enable Device-Side AEC"
@@ -297,7 +373,7 @@ config USE_DEVICE_AEC
         因为性能不够，不建议和微信聊天界面风格同时开启
 
 config USE_SERVER_AEC
-    bool "Enable Server-Side AEC"
+    bool "Enable Server-Side AEC (Unstable)"
     default n
     depends on USE_AUDIO_PROCESSOR
     help
diff --git a/main/application.cc b/main/application.cc
index 264185e3..c52b155d 100644
--- a/main/application.cc
+++ b/main/application.cc
@@ -14,7 +14,15 @@
 #if CONFIG_USE_AUDIO_PROCESSOR
 #include "afe_audio_processor.h"
 #else
-#include "dummy_audio_processor.h"
+#include "no_audio_processor.h"
+#endif
+
+#if CONFIG_USE_AFE_WAKE_WORD
+#include "afe_wake_word.h"
+#elif CONFIG_USE_ESP_WAKE_WORD
+#include "esp_wake_word.h"
+#else
+#include "no_wake_word.h"
 #endif
 
 #include <cstring>
@@ -55,7 +63,15 @@ Application::Application() {
 #if CONFIG_USE_AUDIO_PROCESSOR
     audio_processor_ = std::make_unique<AfeAudioProcessor>();
 #else
-    audio_processor_ = std::make_unique<DummyAudioProcessor>();
+    audio_processor_ = std::make_unique<NoAudioProcessor>();
+#endif
+
+#if CONFIG_USE_AFE_WAKE_WORD
+    wake_word_ = std::make_unique<AfeWakeWord>();
+#elif CONFIG_USE_ESP_WAKE_WORD
+    wake_word_ = std::make_unique<EspWakeWord>();
+#else
+    wake_word_ = std::make_unique<NoWakeWord>();
 #endif
 
     esp_timer_create_args_t clock_timer_args = {
@@ -129,9 +145,7 @@ void Application::CheckNewVersion() {
 
             auto& board = Board::GetInstance();
             board.SetPowerSaveMode(false);
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-            wake_word_detect_.StopDetection();
-#endif
+            wake_word_->StopDetection();
             // 预先关闭音频输出，避免升级过程有音频操作
             auto codec = board.GetAudioCodec();
             codec->EnableInput(false);
@@ -256,8 +270,6 @@ void Application::PlaySound(const std::string_view& sound) {
     }
     background_task_->WaitForCompletion();
 
-    // The assets are encoded at 16000Hz, 60ms frame duration
-    SetDecodeSampleRate(16000, 60);
     const char* data = sound.data();
     size_t size = sound.size();
     for (const char* p = data; p < data + size; ) {
@@ -266,6 +278,8 @@ void Application::PlaySound(const std::string_view& sound) {
 
         auto payload_size = ntohs(p3->payload_size);
         AudioStreamPacket packet;
+        packet.sample_rate = 16000;
+        packet.frame_duration = 60;
         packet.payload.resize(payload_size);
         memcpy(packet.payload.data(), p3->payload, payload_size);
         p += payload_size;
@@ -432,7 +446,7 @@ void Application::Start() {
     });
     protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
         std::lock_guard<std::mutex> lock(mutex_);
-        if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
+        if (device_state_ == kDeviceStateSpeaking && audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
             audio_decode_queue_.emplace_back(std::move(packet));
         }
     });
@@ -442,7 +456,6 @@ void Application::Start() {
             ESP_LOGW(TAG, "Server sample rate %d does not match device output sample rate %d, resampling may cause distortion",
                 protocol_->server_sample_rate(), codec->output_sample_rate());
         }
-        SetDecodeSampleRate(protocol_->server_sample_rate(), protocol_->server_frame_duration());
 
 #if CONFIG_IOT_PROTOCOL_XIAOZHI
         auto& thing_manager = iot::ThingManager::GetInstance();
@@ -600,28 +613,40 @@ void Application::Start() {
         }
     });
 
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-    wake_word_detect_.Initialize(codec);
-#ifdef CONFIG_USE_WAKE_WORD_DETECT
-    wake_word_detect_.OnWakeWordDetected([this](const std::string& wake_word) {
+    wake_word_->Initialize(codec);
+    wake_word_->OnWakeWordDetected([this](const std::string& wake_word) {
         Schedule([this, &wake_word]() {
-            if (device_state_ == kDeviceStateIdle) {
-                SetDeviceState(kDeviceStateConnecting);
-                wake_word_detect_.EncodeWakeWordData();
+            if (!protocol_) {
+                return;
+            }
 
-                if (!protocol_ || !protocol_->OpenAudioChannel()) {
-                    wake_word_detect_.StartDetection();
-                    return;
+            if (device_state_ == kDeviceStateIdle) {
+                wake_word_->EncodeWakeWordData();
+
+                if (!protocol_->IsAudioChannelOpened()) {
+                    SetDeviceState(kDeviceStateConnecting);
+                    if (!protocol_->OpenAudioChannel()) {
+                        wake_word_->StartDetection();
+                        return;
+                    }
                 }
-                
+
+                ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
+#if CONFIG_USE_AFE_WAKE_WORD
                 AudioStreamPacket packet;
                 // Encode and send the wake word data to the server
-                while (wake_word_detect_.GetWakeWordOpus(packet.payload)) {
+                while (wake_word_->GetWakeWordOpus(packet.payload)) {
                     protocol_->SendAudio(packet);
                 }
                 // Set the chat state to wake word detected
                 protocol_->SendWakeWordDetected(wake_word);
-                ESP_LOGI(TAG, "Wake word detected: %s", wake_word.c_str());
+#else
+                // Play the pop up sound to indicate the wake word is detected
+                // And wait 60ms to make sure the queue has been processed by audio task
+                ResetDecoder();
+                PlaySound(Lang::Sounds::P3_POPUP);
+                vTaskDelay(pdMS_TO_TICKS(60));
+#endif
                 SetListeningMode(aec_mode_ == kAecOff ? kListeningModeAutoStop : kListeningModeRealtime);
             } else if (device_state_ == kDeviceStateSpeaking) {
                 AbortSpeaking(kAbortReasonWakeWordDetected);
@@ -630,9 +655,7 @@ void Application::Start() {
             }
         });
     });
-#endif
-    wake_word_detect_.StartDetection();
-#endif
+    wake_word_->StartDetection();
 
     // Wait for the new version check to finish
     xEventGroupWaitBits(event_group_, CHECK_NEW_VERSION_DONE_EVENT, pdTRUE, pdFALSE, portMAX_DELAY);
@@ -751,17 +774,14 @@ void Application::OnAudioOutput() {
         return;
     }
 
-    if (device_state_ == kDeviceStateListening) {
-        audio_decode_queue_.clear();
-        audio_decode_cv_.notify_all();
-        return;
-    }
-
     auto packet = std::move(audio_decode_queue_.front());
     audio_decode_queue_.pop_front();
     lock.unlock();
     audio_decode_cv_.notify_all();
 
+    // Synchronize the sample rate and frame duration
+    SetDecodeSampleRate(packet.sample_rate, packet.frame_duration);
+
     busy_decoding_audio_ = true;
     background_task_->Schedule([this, codec, packet = std::move(packet)]() mutable {
         busy_decoding_audio_ = false;
@@ -782,45 +802,48 @@ void Application::OnAudioOutput() {
         }
         codec->OutputData(pcm);
 #ifdef CONFIG_USE_SERVER_AEC
-            std::lock_guard<std::mutex> lock(timestamp_mutex_);
-            timestamp_queue_.push_back(packet.timestamp);
-            last_output_timestamp_ = packet.timestamp;
+        std::lock_guard<std::mutex> lock(timestamp_mutex_);
+        timestamp_queue_.push_back(packet.timestamp);
 #endif
         last_output_time_ = std::chrono::steady_clock::now();
     });
 }
 
 void Application::OnAudioInput() {
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-    if (wake_word_detect_.IsDetectionRunning()) {
+    if (wake_word_->IsDetectionRunning()) {
         std::vector<int16_t> data;
-        int samples = wake_word_detect_.GetFeedSize();
+        int samples = wake_word_->GetFeedSize();
         if (samples > 0) {
-            ReadAudio(data, 16000, samples);
-            wake_word_detect_.Feed(data);
-            return;
+            if (ReadAudio(data, 16000, samples)) {
+                wake_word_->Feed(data);
+                return;
+            }
         }
     }
-#endif
     if (audio_processor_->IsRunning()) {
         std::vector<int16_t> data;
         int samples = audio_processor_->GetFeedSize();
         if (samples > 0) {
-            ReadAudio(data, 16000, samples);
-            audio_processor_->Feed(data);
-            return;
+            if (ReadAudio(data, 16000, samples)) {
+                audio_processor_->Feed(data);
+                return;
+            }
         }
     }
 
     vTaskDelay(pdMS_TO_TICKS(OPUS_FRAME_DURATION_MS / 2));
 }
 
-void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
+bool Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples) {
     auto codec = Board::GetInstance().GetAudioCodec();
+    if (!codec->input_enabled()) {
+        return false;
+    }
+
     if (codec->input_sample_rate() != sample_rate) {
         data.resize(samples * codec->input_sample_rate() / sample_rate);
         if (!codec->InputData(data)) {
-            return;
+            return false;
         }
         if (codec->input_channels() == 2) {
             auto mic_channel = std::vector<int16_t>(data.size() / 2);
@@ -846,9 +869,10 @@ void Application::ReadAudio(std::vector<int16_t>& data, int sample_rate, int sam
     } else {
         data.resize(samples);
         if (!codec->InputData(data)) {
-            return;
+            return false;
         }
     }
+    return true;
 }
 
 void Application::AbortSpeaking(AbortReason reason) {
@@ -884,17 +908,13 @@ void Application::SetDeviceState(DeviceState state) {
             display->SetStatus(Lang::Strings::STANDBY);
             display->SetEmotion("neutral");
             audio_processor_->Stop();
-            
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-            wake_word_detect_.StartDetection();
-#endif
+            wake_word_->StartDetection();
             break;
         case kDeviceStateConnecting:
             display->SetStatus(Lang::Strings::CONNECTING);
             display->SetEmotion("neutral");
             display->SetChatMessage("system", "");
             timestamp_queue_.clear();
-            last_output_timestamp_ = 0;
             break;
         case kDeviceStateListening:
             display->SetStatus(Lang::Strings::LISTENING);
@@ -909,14 +929,14 @@ void Application::SetDeviceState(DeviceState state) {
                 // Send the start listening command
                 protocol_->SendStartListening(listening_mode_);
                 if (previous_state == kDeviceStateSpeaking) {
+                    audio_decode_queue_.clear();
+                    audio_decode_cv_.notify_all();
                     // FIXME: Wait for the speaker to empty the buffer
                     vTaskDelay(pdMS_TO_TICKS(120));
                 }
                 opus_encoder_->ResetState();
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-                wake_word_detect_.StopDetection();
-#endif
                 audio_processor_->Start();
+                wake_word_->StopDetection();
             }
             break;
         case kDeviceStateSpeaking:
@@ -924,8 +944,11 @@ void Application::SetDeviceState(DeviceState state) {
 
             if (listening_mode_ != kListeningModeRealtime) {
                 audio_processor_->Stop();
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-                wake_word_detect_.StartDetection();
+                // Only AFE wake word can be detected in speaking mode
+#if CONFIG_USE_AFE_WAKE_WORD
+                wake_word_->StartDetection();
+#else
+                wake_word_->StopDetection();
 #endif
             }
             ResetDecoder();
diff --git a/main/application.h b/main/application.h
index 0e327a08..fd138447 100644
--- a/main/application.h
+++ b/main/application.h
@@ -21,12 +21,7 @@
 #include "ota.h"
 #include "background_task.h"
 #include "audio_processor.h"
-
-#if CONFIG_USE_WAKE_WORD_DETECT
-#include "wake_word_detect.h"
-#elif CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-#include "wake_word_no_afe.h"
-#endif
+#include "wake_word.h"
 
 #define SCHEDULE_EVENT (1 << 0)
 #define SEND_AUDIO_EVENT (1 << 1)
@@ -83,14 +78,13 @@ public:
     void SendMcpMessage(const std::string& payload);
     void SetAecMode(AecMode mode);
     AecMode GetAecMode() const { return aec_mode_; }
+    BackgroundTask* GetBackgroundTask() const { return background_task_; }
 
 private:
     Application();
     ~Application();
 
-#if CONFIG_USE_WAKE_WORD_DETECT || CONFIG_USE_WAKE_WORD_DETECT_NO_AFE
-    WakeWordDetect wake_word_detect_;
-#endif
+    std::unique_ptr<WakeWord> wake_word_;
     std::unique_ptr<AudioProcessor> audio_processor_;
     Ota ota_;
     std::mutex mutex_;
@@ -119,7 +113,6 @@ private:
     // 新增：用于维护音频包的timestamp队列
     std::list<uint32_t> timestamp_queue_;
     std::mutex timestamp_mutex_;
-    std::atomic<uint32_t> last_output_timestamp_ = 0;
 
     std::unique_ptr<OpusEncoderWrapper> opus_encoder_;
     std::unique_ptr<OpusDecoderWrapper> opus_decoder_;
@@ -131,7 +124,7 @@ private:
     void MainEventLoop();
     void OnAudioInput();
     void OnAudioOutput();
-    void ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples);
+    bool ReadAudio(std::vector<int16_t>& data, int sample_rate, int samples);
     void ResetDecoder();
     void SetDecodeSampleRate(int sample_rate, int frame_duration);
     void CheckNewVersion();
diff --git a/main/assets/common/popup.p3 b/main/assets/common/popup.p3
new file mode 100644
index 00000000..6bc3060f
Binary files /dev/null and b/main/assets/common/popup.p3 differ
diff --git a/main/audio_processing/afe_audio_processor.cc b/main/audio_processing/afe_audio_processor.cc
index 3415ff59..e456ae4f 100644
--- a/main/audio_processing/afe_audio_processor.cc
+++ b/main/audio_processing/afe_audio_processor.cc
@@ -3,7 +3,7 @@
 
 #define PROCESSOR_RUNNING 0x01
 
-static const char* TAG = "AfeAudioProcessor";
+#define TAG "AfeAudioProcessor"
 
 AfeAudioProcessor::AfeAudioProcessor()
     : afe_data_(nullptr) {
diff --git a/main/audio_processing/wake_word_detect.cc b/main/audio_processing/afe_wake_word.cc
similarity index 85%
rename from main/audio_processing/wake_word_detect.cc
rename to main/audio_processing/afe_wake_word.cc
index 0fce416a..af7bfa06 100644
--- a/main/audio_processing/wake_word_detect.cc
+++ b/main/audio_processing/afe_wake_word.cc
@@ -1,4 +1,4 @@
-#include "wake_word_detect.h"
+#include "afe_wake_word.h"
 #include "application.h"
 
 #include <esp_log.h>
@@ -8,9 +8,9 @@
 
 #define DETECTION_RUNNING_EVENT 1
 
-static const char* TAG = "WakeWordDetect";
+#define TAG "AfeWakeWord"
 
-WakeWordDetect::WakeWordDetect()
+AfeWakeWord::AfeWakeWord()
     : afe_data_(nullptr),
       wake_word_pcm_(),
       wake_word_opus_() {
@@ -18,7 +18,7 @@ WakeWordDetect::WakeWordDetect()
     event_group_ = xEventGroupCreate();
 }
 
-WakeWordDetect::~WakeWordDetect() {
+AfeWakeWord::~AfeWakeWord() {
     if (afe_data_ != nullptr) {
         afe_iface_->destroy(afe_data_);
     }
@@ -30,7 +30,7 @@ WakeWordDetect::~WakeWordDetect() {
     vEventGroupDelete(event_group_);
 }
 
-void WakeWordDetect::Initialize(AudioCodec* codec) {
+void AfeWakeWord::Initialize(AudioCodec* codec) {
     codec_ = codec;
     int ref_num = codec_->input_reference() ? 1 : 0;
 
@@ -67,46 +67,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) {
     afe_data_ = afe_iface_->create_from_config(afe_config);
 
     xTaskCreate([](void* arg) {
-        auto this_ = (WakeWordDetect*)arg;
+        auto this_ = (AfeWakeWord*)arg;
         this_->AudioDetectionTask();
         vTaskDelete(NULL);
     }, "audio_detection", 4096, this, 3, nullptr);
 }
 
-void WakeWordDetect::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
+void AfeWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
     wake_word_detected_callback_ = callback;
 }
 
-void WakeWordDetect::StartDetection() {
+void AfeWakeWord::StartDetection() {
     xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
 }
 
-void WakeWordDetect::StopDetection() {
+void AfeWakeWord::StopDetection() {
     xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
     if (afe_data_ != nullptr) {
         afe_iface_->reset_buffer(afe_data_);
     }
 }
 
-bool WakeWordDetect::IsDetectionRunning() {
+bool AfeWakeWord::IsDetectionRunning() {
     return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
 }
 
-void WakeWordDetect::Feed(const std::vector<int16_t>& data) {
+void AfeWakeWord::Feed(const std::vector<int16_t>& data) {
     if (afe_data_ == nullptr) {
         return;
     }
     afe_iface_->feed(afe_data_, data.data());
 }
 
-size_t WakeWordDetect::GetFeedSize() {
+size_t AfeWakeWord::GetFeedSize() {
     if (afe_data_ == nullptr) {
         return 0;
     }
     return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
 }
 
-void WakeWordDetect::AudioDetectionTask() {
+void AfeWakeWord::AudioDetectionTask() {
     auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);
     auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);
     ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d",
@@ -121,7 +121,7 @@ void WakeWordDetect::AudioDetectionTask() {
         }
 
         // Store the wake word data for voice recognition, like who is speaking
-        StoreWakeWordData((uint16_t*)res->data, res->data_size / sizeof(uint16_t));
+        StoreWakeWordData(res->data, res->data_size / sizeof(int16_t));
 
         if (res->wakeup_state == WAKENET_DETECTED) {
             StopDetection();
@@ -134,7 +134,7 @@ void WakeWordDetect::AudioDetectionTask() {
     }
 }
 
-void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
+void AfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) {
     // store audio data to wake_word_pcm_
     wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
     // keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
@@ -143,13 +143,13 @@ void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
     }
 }
 
-void WakeWordDetect::EncodeWakeWordData() {
+void AfeWakeWord::EncodeWakeWordData() {
     wake_word_opus_.clear();
     if (wake_word_encode_task_stack_ == nullptr) {
         wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM);
     }
     wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
-        auto this_ = (WakeWordDetect*)arg;
+        auto this_ = (AfeWakeWord*)arg;
         {
             auto start_time = esp_timer_get_time();
             auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
@@ -176,7 +176,7 @@ void WakeWordDetect::EncodeWakeWordData() {
     }, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
 }
 
-bool WakeWordDetect::GetWakeWordOpus(std::vector<uint8_t>& opus) {
+bool AfeWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
     std::unique_lock<std::mutex> lock(wake_word_mutex_);
     wake_word_cv_.wait(lock, [this]() {
         return !wake_word_opus_.empty();
diff --git a/main/audio_processing/wake_word_detect.h b/main/audio_processing/afe_wake_word.h
similarity index 87%
rename from main/audio_processing/wake_word_detect.h
rename to main/audio_processing/afe_wake_word.h
index 583b50c9..795a20b7 100644
--- a/main/audio_processing/wake_word_detect.h
+++ b/main/audio_processing/afe_wake_word.h
@@ -1,5 +1,5 @@
-#ifndef WAKE_WORD_DETECT_H
-#define WAKE_WORD_DETECT_H
+#ifndef AFE_WAKE_WORD_H
+#define AFE_WAKE_WORD_H
 
 #include <freertos/FreeRTOS.h>
 #include <freertos/task.h>
@@ -16,11 +16,12 @@
 #include <condition_variable>
 
 #include "audio_codec.h"
+#include "wake_word.h"
 
-class WakeWordDetect {
+class AfeWakeWord : public WakeWord {
 public:
-    WakeWordDetect();
-    ~WakeWordDetect();
+    AfeWakeWord();
+    ~AfeWakeWord();
 
     void Initialize(AudioCodec* codec);
     void Feed(const std::vector<int16_t>& data);
@@ -51,7 +52,7 @@ private:
     std::mutex wake_word_mutex_;
     std::condition_variable wake_word_cv_;
 
-    void StoreWakeWordData(uint16_t* data, size_t size);
+    void StoreWakeWordData(const int16_t* data, size_t size);
     void AudioDetectionTask();
 };
 
diff --git a/main/audio_processing/wake_word_no_afe.cc b/main/audio_processing/esp_wake_word.cc
similarity index 55%
rename from main/audio_processing/wake_word_no_afe.cc
rename to main/audio_processing/esp_wake_word.cc
index 4bffb020..a71b8f5a 100644
--- a/main/audio_processing/wake_word_no_afe.cc
+++ b/main/audio_processing/esp_wake_word.cc
@@ -1,4 +1,4 @@
-#include "wake_word_no_afe.h"
+#include "esp_wake_word.h"
 #include "application.h"
 
 #include <esp_log.h>
@@ -8,13 +8,13 @@
 
 #define DETECTION_RUNNING_EVENT 1
 
-static const char* TAG = "WakeWordDetect";
+#define TAG "EspWakeWord"
 
-WakeWordDetect::WakeWordDetect() {
+EspWakeWord::EspWakeWord() {
     event_group_ = xEventGroupCreate();
 }
 
-WakeWordDetect::~WakeWordDetect() {
+EspWakeWord::~EspWakeWord() {
     if (wakenet_data_ != nullptr) {
         wakenet_iface_->destroy(wakenet_data_);
         esp_srmodel_deinit(wakenet_model_);
@@ -23,13 +23,16 @@ WakeWordDetect::~WakeWordDetect() {
     vEventGroupDelete(event_group_);
 }
 
-void WakeWordDetect::Initialize(AudioCodec* codec) {
+void EspWakeWord::Initialize(AudioCodec* codec) {
     codec_ = codec;
 
     wakenet_model_ = esp_srmodel_init("model");
 
     if(wakenet_model_->num > 1) {
         ESP_LOGW(TAG, "More than one model found, using the first one");
+    } else if (wakenet_model_->num == 0) {
+        ESP_LOGE(TAG, "No model found");
+        return;
     }
     char *model_name = wakenet_model_->model_name[0];
     wakenet_iface_ = (esp_wn_iface_t*)esp_wn_handle_from_name(model_name);
@@ -40,28 +43,46 @@ void WakeWordDetect::Initialize(AudioCodec* codec) {
     ESP_LOGI(TAG, "Wake word(%s),freq: %d, chunksize: %d", model_name, frequency, audio_chunksize);
 }
 
-void WakeWordDetect::StartDetection() {
+void EspWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
+    wake_word_detected_callback_ = callback;
+}
+
+void EspWakeWord::StartDetection() {
+    ESP_LOGI(TAG, "Start wake word detection");
     xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
 }
 
-void WakeWordDetect::StopDetection() {
+void EspWakeWord::StopDetection() {
+    ESP_LOGI(TAG, "Stop wake word detection");
     xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
 }
 
-bool WakeWordDetect::IsDetectionRunning() {
+bool EspWakeWord::IsDetectionRunning() {
     return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
 }
 
-void WakeWordDetect::Feed(const std::vector<int16_t>& data) {
+void EspWakeWord::Feed(const std::vector<int16_t>& data) {
     int res = wakenet_iface_->detect(wakenet_data_, (int16_t *)data.data());
     if (res > 0) {
-        ESP_LOGI(TAG, "Wake word detected");
-        auto& app = Application::GetInstance();
-        app.ToggleChatState();
+        StopDetection();
+        last_detected_wake_word_ = wakenet_iface_->get_word_name(wakenet_data_, res);
+
+        if (wake_word_detected_callback_) {
+            wake_word_detected_callback_(last_detected_wake_word_);
+        }
     }
 }
 
-size_t WakeWordDetect::GetFeedSize() {
-
+size_t EspWakeWord::GetFeedSize() {
+    if (wakenet_data_ == nullptr) {
+        return 0;
+    }
     return wakenet_iface_->get_samp_chunksize(wakenet_data_) * codec_->input_channels();
 }
+
+void EspWakeWord::EncodeWakeWordData() {
+}
+
+bool EspWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
+    return false;
+}
diff --git a/main/audio_processing/wake_word_no_afe.h b/main/audio_processing/esp_wake_word.h
similarity index 54%
rename from main/audio_processing/wake_word_no_afe.h
rename to main/audio_processing/esp_wake_word.h
index 90e1110d..189243c8 100644
--- a/main/audio_processing/wake_word_no_afe.h
+++ b/main/audio_processing/esp_wake_word.h
@@ -1,13 +1,13 @@
-#ifndef WAKE_WORD_DETECT_H
-#define WAKE_WORD_DETECT_H
+#ifndef ESP_WAKE_WORD_H
+#define ESP_WAKE_WORD_H
 
 #include <freertos/FreeRTOS.h>
 #include <freertos/task.h>
 #include <freertos/event_groups.h>
 
-#include "model_path.h"
-#include "esp_wn_iface.h"
-#include "esp_wn_models.h"
+#include <esp_wn_iface.h>
+#include <esp_wn_models.h>
+#include <model_path.h>
 
 #include <list>
 #include <string>
@@ -17,19 +17,23 @@
 #include <condition_variable>
 
 #include "audio_codec.h"
-#include <model_path.h>
+#include "wake_word.h"
 
-class WakeWordDetect {
+class EspWakeWord : public WakeWord {
 public:
-    WakeWordDetect();
-    ~WakeWordDetect();
+    EspWakeWord();
+    ~EspWakeWord();
 
     void Initialize(AudioCodec* codec);
     void Feed(const std::vector<int16_t>& data);
+    void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
     void StartDetection();
     void StopDetection();
     bool IsDetectionRunning();
     size_t GetFeedSize();
+    void EncodeWakeWordData();
+    bool GetWakeWordOpus(std::vector<uint8_t>& opus);
+    const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; }
 
 private:
     esp_wn_iface_t *wakenet_iface_ = nullptr;
@@ -37,6 +41,9 @@ private:
     srmodel_list_t *wakenet_model_ = nullptr;
     EventGroupHandle_t event_group_;
     AudioCodec* codec_ = nullptr;
+
+    std::function<void(const std::string& wake_word)> wake_word_detected_callback_;
+    std::string last_detected_wake_word_;
 };
 
 #endif
diff --git a/main/audio_processing/dummy_audio_processor.cc b/main/audio_processing/no_audio_processor.cc
similarity index 51%
rename from main/audio_processing/dummy_audio_processor.cc
rename to main/audio_processing/no_audio_processor.cc
index 7cb606d7..a84f8af1 100644
--- a/main/audio_processing/dummy_audio_processor.cc
+++ b/main/audio_processing/no_audio_processor.cc
@@ -1,13 +1,13 @@
-#include "dummy_audio_processor.h"
+#include "no_audio_processor.h"
 #include <esp_log.h>
 
-#define TAG "DummyAudioProcessor"
+#define TAG "NoAudioProcessor"
 
-void DummyAudioProcessor::Initialize(AudioCodec* codec) {
+void NoAudioProcessor::Initialize(AudioCodec* codec) {
     codec_ = codec;
 }
 
-void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
+void NoAudioProcessor::Feed(const std::vector<int16_t>& data) {
     if (!is_running_ || !output_callback_) {
         return;
     }
@@ -15,27 +15,27 @@ void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
     output_callback_(std::vector<int16_t>(data));
 }
 
-void DummyAudioProcessor::Start() {
+void NoAudioProcessor::Start() {
     is_running_ = true;
 }
 
-void DummyAudioProcessor::Stop() {
+void NoAudioProcessor::Stop() {
     is_running_ = false;
 }
 
-bool DummyAudioProcessor::IsRunning() {
+bool NoAudioProcessor::IsRunning() {
     return is_running_;
 }
 
-void DummyAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
+void NoAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
     output_callback_ = callback;
 }
 
-void DummyAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
+void NoAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
     vad_state_change_callback_ = callback;
 }
 
-size_t DummyAudioProcessor::GetFeedSize() {
+size_t NoAudioProcessor::GetFeedSize() {
     if (!codec_) {
         return 0;
     }
@@ -43,7 +43,7 @@ size_t DummyAudioProcessor::GetFeedSize() {
     return 30 * codec_->input_sample_rate() / 1000;
 }
 
-void DummyAudioProcessor::EnableDeviceAec(bool enable) {
+void NoAudioProcessor::EnableDeviceAec(bool enable) {
     if (enable) {
         ESP_LOGE(TAG, "Device AEC is not supported");
     }
diff --git a/main/audio_processing/dummy_audio_processor.h b/main/audio_processing/no_audio_processor.h
similarity index 86%
rename from main/audio_processing/dummy_audio_processor.h
rename to main/audio_processing/no_audio_processor.h
index 4383b7a9..ed54741a 100644
--- a/main/audio_processing/dummy_audio_processor.h
+++ b/main/audio_processing/no_audio_processor.h
@@ -7,10 +7,10 @@
 #include "audio_processor.h"
 #include "audio_codec.h"
 
-class DummyAudioProcessor : public AudioProcessor {
+class NoAudioProcessor : public AudioProcessor {
 public:
-    DummyAudioProcessor() = default;
-    ~DummyAudioProcessor() = default;
+    NoAudioProcessor() = default;
+    ~NoAudioProcessor() = default;
 
     void Initialize(AudioCodec* codec) override;
     void Feed(const std::vector<int16_t>& data) override;
diff --git a/main/audio_processing/no_wake_word.cc b/main/audio_processing/no_wake_word.cc
new file mode 100644
index 00000000..34a85543
--- /dev/null
+++ b/main/audio_processing/no_wake_word.cc
@@ -0,0 +1,45 @@
+#include "no_wake_word.h"
+#include <esp_log.h>
+
+#define TAG "NoWakeWord"
+
+void NoWakeWord::Initialize(AudioCodec* codec) {
+    codec_ = codec;
+}
+
+void NoWakeWord::Feed(const std::vector<int16_t>& data) {
+    // Do nothing - no wake word processing
+}
+
+void NoWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
+    // Do nothing - no wake word processing
+}
+
+void NoWakeWord::StartDetection() {
+    // Do nothing - no wake word processing
+}
+
+void NoWakeWord::StopDetection() {
+    // Do nothing - no wake word processing
+}
+
+bool NoWakeWord::IsDetectionRunning() {
+    return false;  // No wake word processing
+}
+
+size_t NoWakeWord::GetFeedSize() {
+    return 0;  // No specific feed size requirement
+}
+
+void NoWakeWord::EncodeWakeWordData() {
+    // Do nothing - no encoding needed
+}
+
+bool NoWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
+    opus.clear();
+    return false;  // No opus data available
+}
+
+const std::string& NoWakeWord::GetLastDetectedWakeWord() const {
+    return "";  // No wake word detected
+}
\ No newline at end of file
diff --git a/main/audio_processing/no_wake_word.h b/main/audio_processing/no_wake_word.h
new file mode 100644
index 00000000..c367595b
--- /dev/null
+++ b/main/audio_processing/no_wake_word.h
@@ -0,0 +1,31 @@
+#ifndef NO_WAKE_WORD_H
+#define NO_WAKE_WORD_H
+
+#include <vector>
+#include <functional>
+#include <string>
+
+#include "wake_word.h"
+#include "audio_codec.h"
+
+class NoWakeWord : public WakeWord {
+public:
+    NoWakeWord() = default;
+    ~NoWakeWord() = default;
+
+    void Initialize(AudioCodec* codec) override;
+    void Feed(const std::vector<int16_t>& data) override;
+    void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) override;
+    void StartDetection() override;
+    void StopDetection() override;
+    bool IsDetectionRunning() override;
+    size_t GetFeedSize() override;
+    void EncodeWakeWordData() override;
+    bool GetWakeWordOpus(std::vector<uint8_t>& opus) override;
+    const std::string& GetLastDetectedWakeWord() const override;
+
+private:
+    AudioCodec* codec_ = nullptr;
+};
+
+#endif 
\ No newline at end of file
diff --git a/main/audio_processing/wake_word.h b/main/audio_processing/wake_word.h
new file mode 100644
index 00000000..395f96cd
--- /dev/null
+++ b/main/audio_processing/wake_word.h
@@ -0,0 +1,26 @@
+#ifndef WAKE_WORD_H
+#define WAKE_WORD_H
+
+#include <string>
+#include <vector>
+#include <functional>
+
+#include "audio_codec.h"
+
+class WakeWord {
+public:
+    virtual ~WakeWord() = default;
+    
+    virtual void Initialize(AudioCodec* codec) = 0;
+    virtual void Feed(const std::vector<int16_t>& data) = 0;
+    virtual void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) = 0;
+    virtual void StartDetection() = 0;
+    virtual void StopDetection() = 0;
+    virtual bool IsDetectionRunning() = 0;
+    virtual size_t GetFeedSize() = 0;
+    virtual void EncodeWakeWordData() = 0;
+    virtual bool GetWakeWordOpus(std::vector<uint8_t>& opus) = 0;
+    virtual const std::string& GetLastDetectedWakeWord() const = 0;
+};
+
+#endif
diff --git a/main/boards/esp-hi/config.json b/main/boards/esp-hi/config.json
index bd31a9c3..f432c446 100644
--- a/main/boards/esp-hi/config.json
+++ b/main/boards/esp-hi/config.json
@@ -30,7 +30,8 @@
                 "CONFIG_MBEDTLS_DYNAMIC_FREE_CONFIG_DATA=y",
                 "CONFIG_NEWLIB_NANO_FORMAT=y",
                 "CONFIG_MMAP_FILE_NAME_LENGTH=25",
-                "CONFIG_ESP_CONSOLE_NONE=y"
+                "CONFIG_ESP_CONSOLE_NONE=y",
+                "CONFIG_IOT_PROTOCOL_XIAOZHI=y"
             ]
         }
     ]
diff --git a/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc b/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc
index 629a34c1..85b24500 100644
--- a/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc
+++ b/main/boards/genjutech-s3-1.54tft/genjutech-s3-1.54tft.cc
@@ -70,7 +70,7 @@ private:
     }
 
     void InitializePowerSaveTimer() {
-        power_save_timer_ = new PowerSaveTimer(160, 60);
+        power_save_timer_ = new PowerSaveTimer(240, 60);
         power_save_timer_->OnEnterSleepMode([this]() {
             ESP_LOGI(TAG, "Enabling sleep mode");
             auto display = GetDisplay();
diff --git a/main/boards/lichuang-c3-dev/config.json b/main/boards/lichuang-c3-dev/config.json
index cdc508f1..d27ae460 100644
--- a/main/boards/lichuang-c3-dev/config.json
+++ b/main/boards/lichuang-c3-dev/config.json
@@ -5,7 +5,9 @@
             "name": "lichuang-c3-dev",
             "sdkconfig_append": [
                 "CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y",
-                "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\""
+                "CONFIG_PARTITION_TABLE_CUSTOM_FILENAME=\"partitions_8M.csv\"",
+                "CONFIG_ESP_WIFI_ENTERPRISE_SUPPORT=n",
+                "CONFIG_LWIP_IPV6=n"
             ]
         }
     ]
diff --git a/main/boards/magiclick-c3-v2/config.json b/main/boards/magiclick-c3-v2/config.json
index f3eeb8f6..4503ebdb 100644
--- a/main/boards/magiclick-c3-v2/config.json
+++ b/main/boards/magiclick-c3-v2/config.json
@@ -5,7 +5,8 @@
             "name": "magiclick-c3-v2",
             "sdkconfig_append": [
                 "CONFIG_PM_ENABLE=y",
-                "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
+                "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
+                "CONFIG_USE_ESP_WAKE_WORD=n"
             ]
         }
     ]
diff --git a/main/boards/magiclick-c3/config.json b/main/boards/magiclick-c3/config.json
index 09eb3fdf..34d1471d 100644
--- a/main/boards/magiclick-c3/config.json
+++ b/main/boards/magiclick-c3/config.json
@@ -5,7 +5,8 @@
             "name": "magiclick-c3",
             "sdkconfig_append": [
                 "CONFIG_PM_ENABLE=y",
-                "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
+                "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
+                "CONFIG_USE_ESP_WAKE_WORD=n"
             ]
         }
     ]
diff --git a/main/boards/xmini-c3/config.json b/main/boards/xmini-c3/config.json
index d6d2796c..d497d740 100644
--- a/main/boards/xmini-c3/config.json
+++ b/main/boards/xmini-c3/config.json
@@ -5,7 +5,8 @@
             "name": "xmini-c3",
             "sdkconfig_append": [
                 "CONFIG_PM_ENABLE=y",
-                "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y"
+                "CONFIG_FREERTOS_USE_TICKLESS_IDLE=y",
+                "CONFIG_USE_ESP_WAKE_WORD=y"
             ]
         }
     ]
diff --git a/main/boards/xmini-c3/xmini_c3_board.cc b/main/boards/xmini-c3/xmini_c3_board.cc
index 2c3011a2..847b8754 100644
--- a/main/boards/xmini-c3/xmini_c3_board.cc
+++ b/main/boards/xmini-c3/xmini_c3_board.cc
@@ -30,10 +30,10 @@ private:
     Display* display_ = nullptr;
     Button boot_button_;
     bool press_to_talk_enabled_ = false;
-    PowerSaveTimer* power_save_timer_;
+    PowerSaveTimer* power_save_timer_ = nullptr;
 
     void InitializePowerSaveTimer() {
-        power_save_timer_ = new PowerSaveTimer(160, 60);
+        power_save_timer_ = new PowerSaveTimer(160, 600);
         power_save_timer_->OnEnterSleepMode([this]() {
             ESP_LOGI(TAG, "Enabling sleep mode");
             auto display = GetDisplay();
@@ -130,7 +130,9 @@ private:
             }
         });
         boot_button_.OnPressDown([this]() {
-            power_save_timer_->WakeUp();
+            if (power_save_timer_) {
+                power_save_timer_->WakeUp();
+            }
             if (press_to_talk_enabled_) {
                 Application::GetInstance().StartListening();
             }
diff --git a/main/protocols/mqtt_protocol.cc b/main/protocols/mqtt_protocol.cc
index bf96b12d..6cf59bbb 100644
--- a/main/protocols/mqtt_protocol.cc
+++ b/main/protocols/mqtt_protocol.cc
@@ -227,6 +227,8 @@ bool MqttProtocol::OpenAudioChannel() {
         auto nonce = (uint8_t*)data.data();
         auto encrypted = (uint8_t*)data.data() + aes_nonce_.size();
         AudioStreamPacket packet;
+        packet.sample_rate = server_sample_rate_;
+        packet.frame_duration = server_frame_duration_;
         packet.timestamp = timestamp;
         packet.payload.resize(decrypted_size);
         int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet.payload.data());
diff --git a/main/protocols/protocol.h b/main/protocols/protocol.h
index e23561ee..31f1ac43 100644
--- a/main/protocols/protocol.h
+++ b/main/protocols/protocol.h
@@ -8,6 +8,8 @@
 #include <vector>
 
 struct AudioStreamPacket {
+    int sample_rate = 0;
+    int frame_duration = 0;
     uint32_t timestamp = 0;
     std::vector<uint8_t> payload;
 };
diff --git a/main/protocols/websocket_protocol.cc b/main/protocols/websocket_protocol.cc
index 45c00957..2d5c7840 100644
--- a/main/protocols/websocket_protocol.cc
+++ b/main/protocols/websocket_protocol.cc
@@ -124,6 +124,8 @@ bool WebsocketProtocol::OpenAudioChannel() {
                     bp2->payload_size = ntohl(bp2->payload_size);
                     auto payload = (uint8_t*)bp2->payload;
                     on_incoming_audio_(AudioStreamPacket{
+                        .sample_rate = server_sample_rate_,
+                        .frame_duration = server_frame_duration_,
                         .timestamp = bp2->timestamp,
                         .payload = std::vector<uint8_t>(payload, payload + bp2->payload_size)
                     });
@@ -133,11 +135,15 @@ bool WebsocketProtocol::OpenAudioChannel() {
                     bp3->payload_size = ntohs(bp3->payload_size);
                     auto payload = (uint8_t*)bp3->payload;
                     on_incoming_audio_(AudioStreamPacket{
+                        .sample_rate = server_sample_rate_,
+                        .frame_duration = server_frame_duration_,
                         .timestamp = 0,
                         .payload = std::vector<uint8_t>(payload, payload + bp3->payload_size)
                     });
                 } else {
                     on_incoming_audio_(AudioStreamPacket{
+                        .sample_rate = server_sample_rate_,
+                        .frame_duration = server_frame_duration_,
                         .timestamp = 0,
                         .payload = std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len)
                     });
diff --git a/sdkconfig.defaults.esp32c3 b/sdkconfig.defaults.esp32c3
index e3ac6c78..725e561b 100644
--- a/sdkconfig.defaults.esp32c3
+++ b/sdkconfig.defaults.esp32c3
@@ -1,2 +1,3 @@
 
 CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y
+CONFIG_SR_WN_WN9S_NIHAOXIAOZHI=y