diff --git a/CMakeLists.txt b/CMakeLists.txt index 32efa58d..f994f09d 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # CMakeLists in this exact order for cmake to work correctly cmake_minimum_required(VERSION 3.16) -set(PROJECT_VER "0.6.0") +set(PROJECT_VER "0.6.1") include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(xiaozhi) diff --git a/main/Application.cc b/main/Application.cc index e67e01fa..a025ffd0 100644 --- a/main/Application.cc +++ b/main/Application.cc @@ -230,7 +230,7 @@ void Application::Start() { SetChatState(kChatStateIdle); } } else if (chat_state_ == kChatStateSpeaking) { - break_speaking_ = true; + AbortSpeaking(); } // Resume detection @@ -272,7 +272,7 @@ void Application::Start() { SetChatState(kChatStateIdle); } } else if (chat_state_ == kChatStateSpeaking) { - break_speaking_ = true; + AbortSpeaking(); } else if (chat_state_ == kChatStateListening) { if (ws_client_ && ws_client_->IsConnected()) { ws_client_->Close(); @@ -356,6 +356,22 @@ void Application::MainLoop() { } } +void Application::AbortSpeaking() { + ESP_LOGI(TAG, "Abort speaking"); + skip_to_end_ = true; + + if (ws_client_ && ws_client_->IsConnected()) { + cJSON* root = cJSON_CreateObject(); + cJSON_AddStringToObject(root, "type", "abort"); + char* json = cJSON_PrintUnformatted(root); + + std::lock_guard lock(mutex_); + ws_client_->Send(json); + cJSON_Delete(root); + free(json); + } +} + void Application::SetChatState(ChatState state) { const char* state_str[] = { "unknown", @@ -368,6 +384,10 @@ void Application::SetChatState(ChatState state) { "upgrading", "invalid_state" }; + if (chat_state_ == state) { + // No need to update the state + return; + } chat_state_ = state; ESP_LOGI(TAG, "STATE: %s", state_str[chat_state_]); @@ -488,16 +508,6 @@ void Application::HandleAudioPacket(AudioPacket* packet) { // This will block until the audio device has finished playing the audio audio_device_->OutputData(packet->pcm); - - if (break_speaking_) { - skip_to_end_ = true; - - // Play a silence and skip to the end - int frame_size = opus_decode_sample_rate_ / 1000 * opus_duration_ms_; - std::vector silence(frame_size); - bzero(silence.data(), silence.size() * sizeof(int16_t)); - audio_device_->OutputData(silence); - } break; } case kAudioPacketTypeStart: @@ -520,6 +530,9 @@ void Application::HandleAudioPacket(AudioPacket* packet) { ESP_LOGI(TAG, "<< %s", packet->text.c_str()); break; case kAudioPacketTypeSentenceEnd: + if (break_speaking_) { + skip_to_end_ = true; + } break; default: ESP_LOGI(TAG, "Unknown packet type: %d", packet->type); @@ -622,8 +635,7 @@ void Application::StartWebSocketClient() { SetDecodeSampleRate(sample_rate->valueint); } - // If the device is speaking, we need to break the speaking - break_speaking_ = true; + // If the device is speaking, we need to skip the last session skip_to_end_ = true; } else if (strcmp(state->valuestring, "stop") == 0) { packet->type = kAudioPacketTypeStop; diff --git a/main/Application.h b/main/Application.h index e0517c11..e35ec85b 100644 --- a/main/Application.h +++ b/main/Application.h @@ -77,7 +77,7 @@ public: void Schedule(std::function callback); void SetChatState(ChatState state); void Alert(const std::string&& title, const std::string&& message); - + void AbortSpeaking(); // 删除拷贝构造函数和赋值运算符 Application(const Application&) = delete; Application& operator=(const Application&) = delete; diff --git a/main/AudioDevice.cc b/main/AudioDevice.cc index 417f0863..8627e3fc 100644 --- a/main/AudioDevice.cc +++ b/main/AudioDevice.cc @@ -95,7 +95,7 @@ void AudioDevice::CreateSimplexChannels() { .role = I2S_ROLE_MASTER, .dma_desc_num = 6, .dma_frame_num = 240, - .auto_clear_after_cb = false, + .auto_clear_after_cb = true, .auto_clear_before_cb = false, .intr_priority = 0, };