From 0c57df1cd82d6cc93b50b2c69ab4f39d1d8eb0de Mon Sep 17 00:00:00 2001 From: Terrence Date: Mon, 26 May 2025 14:30:44 +0800 Subject: [PATCH] v1.6.5: Improve performance and memory usage --- CMakeLists.txt | 2 +- main/application.cc | 36 +++++++++++++---------- main/application.h | 5 ++-- main/audio_processing/wake_word_detect.cc | 6 ++-- main/idf_component.yml | 2 +- main/ota.cc | 2 +- main/protocols/mqtt_protocol.cc | 5 +--- main/protocols/protocol.cc | 5 ---- main/protocols/protocol.h | 2 -- main/protocols/websocket_protocol.cc | 7 ----- main/system_info.cc | 13 +++++++- main/system_info.h | 4 ++- sdkconfig.defaults | 6 ++++ 13 files changed, 50 insertions(+), 45 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6cfa79c7..8ebcd8f6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # CMakeLists in this exact order for cmake to work correctly cmake_minimum_required(VERSION 3.16) -set(PROJECT_VER "1.6.4") +set(PROJECT_VER "1.6.5") # Add this line to disable the specific warning add_compile_options(-Wno-missing-field-initializers) diff --git a/main/application.cc b/main/application.cc index d0560aa3..aff6dd19 100644 --- a/main/application.cc +++ b/main/application.cc @@ -139,7 +139,7 @@ void Application::CheckNewVersion() { ota_.StartUpgrade([display](int progress, size_t speed) { char buffer[64]; - snprintf(buffer, sizeof(buffer), "%d%% %zuKB/s", progress, speed / 1024); + snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024); display->SetChatMessage("system", buffer); }); @@ -365,8 +365,8 @@ void Application::Start() { ESP_LOGI(TAG, "ML307 board detected, setting opus encoder complexity to 5"); opus_encoder_->SetComplexity(5); } else { - ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 3"); - opus_encoder_->SetComplexity(3); + ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 0"); + opus_encoder_->SetComplexity(0); } if (codec->input_sample_rate() != 16000) { @@ -418,9 +418,8 @@ void Application::Start() { Alert(Lang::Strings::ERROR, message.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION); }); protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) { - const int max_packets_in_queue = 600 / OPUS_FRAME_DURATION_MS; std::lock_guard lock(mutex_); - if (audio_decode_queue_.size() < max_packets_in_queue) { + if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) { audio_decode_queue_.emplace_back(std::move(packet)); } }); @@ -544,10 +543,17 @@ void Application::Start() { audio_processor_->Initialize(codec); audio_processor_->OnOutput([this](std::vector&& data) { - background_task_->Schedule([this, data = std::move(data)]() mutable { - if (protocol_->IsAudioChannelBusy()) { + { + std::lock_guard lock(mutex_); + // We do not have a send queue yet, but all packets are sent by the main task + // so we use the main task queue to limit the number of packets + if (main_tasks_.size() > MAX_AUDIO_PACKETS_IN_QUEUE) { + ESP_LOGW(TAG, "Too many main tasks = %u, skip sending audio...", main_tasks_.size()); return; } + } + + background_task_->Schedule([this, data = std::move(data)]() mutable { opus_encoder_->Encode(std::move(data), [this](std::vector&& opus) { AudioStreamPacket packet; packet.payload = std::move(opus); @@ -631,6 +637,9 @@ void Application::Start() { ResetDecoder(); PlaySound(Lang::Sounds::P3_SUCCESS); } + + // Print heap stats + SystemInfo::PrintHeapStats(); // Enter the main event loop MainEventLoop(); @@ -644,14 +653,9 @@ void Application::OnClockTimer() { // Print the debug info every 10 seconds if (clock_ticks_ % 10 == 0) { - // char buffer[500]; - // vTaskList(buffer); - // ESP_LOGI(TAG, "Task list: \n%s", buffer); - // SystemInfo::PrintRealTimeStats(pdMS_TO_TICKS(1000)); - - int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL); - int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL); - ESP_LOGI(TAG, "Free internal: %u minimal internal: %u", free_sram, min_free_sram); + // SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000)); + // SystemInfo::PrintTaskList(); + SystemInfo::PrintHeapStats(); // If we have synchronized server time, set the status to clock "HH:MM" if the device is idle if (ota_.HasServerTime()) { @@ -884,7 +888,7 @@ void Application::SetDeviceState(DeviceState state) { if (!audio_processor_->IsRunning()) { // Send the start listening command protocol_->SendStartListening(listening_mode_); - if (listening_mode_ == kListeningModeAutoStop && previous_state == kDeviceStateSpeaking) { + if (previous_state == kDeviceStateSpeaking) { // FIXME: Wait for the speaker to empty the buffer vTaskDelay(pdMS_TO_TICKS(120)); } diff --git a/main/application.h b/main/application.h index 6914ecf0..e10ae8a7 100644 --- a/main/application.h +++ b/main/application.h @@ -27,9 +27,7 @@ #endif #define SCHEDULE_EVENT (1 << 0) -#define AUDIO_INPUT_READY_EVENT (1 << 1) -#define AUDIO_OUTPUT_READY_EVENT (1 << 2) -#define CHECK_NEW_VERSION_DONE_EVENT (1 << 3) +#define CHECK_NEW_VERSION_DONE_EVENT (1 << 2) enum DeviceState { kDeviceStateUnknown, @@ -45,6 +43,7 @@ enum DeviceState { }; #define OPUS_FRAME_DURATION_MS 60 +#define MAX_AUDIO_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS) class Application { public: diff --git a/main/audio_processing/wake_word_detect.cc b/main/audio_processing/wake_word_detect.cc index f623eb37..0fce416a 100644 --- a/main/audio_processing/wake_word_detect.cc +++ b/main/audio_processing/wake_word_detect.cc @@ -137,8 +137,8 @@ void WakeWordDetect::AudioDetectionTask() { void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) { // store audio data to wake_word_pcm_ wake_word_pcm_.emplace_back(std::vector(data, data + samples)); - // keep about 2 seconds of data, detect duration is 32ms (sample_rate == 16000, chunksize == 512) - while (wake_word_pcm_.size() > 2000 / 32) { + // keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512) + while (wake_word_pcm_.size() > 2000 / 30) { wake_word_pcm_.pop_front(); } } @@ -165,7 +165,7 @@ void WakeWordDetect::EncodeWakeWordData() { this_->wake_word_pcm_.clear(); auto end_time = esp_timer_get_time(); - ESP_LOGI(TAG, "Encode wake word opus %zu packets in %lld ms", + ESP_LOGI(TAG, "Encode wake word opus %u packets in %lld ms", this_->wake_word_opus_.size(), (end_time - start_time) / 1000); std::lock_guard lock(this_->wake_word_mutex_); diff --git a/main/idf_component.yml b/main/idf_component.yml index ded1ebd6..22f3172f 100644 --- a/main/idf_component.yml +++ b/main/idf_component.yml @@ -11,7 +11,7 @@ dependencies: 78/esp_lcd_nv3023: ~1.0.0 78/esp-wifi-connect: ~2.4.2 78/esp-opus-encoder: ~2.3.2 - 78/esp-ml307: ~2.1.0 + 78/esp-ml307: ~2.1.2 78/xiaozhi-fonts: ~1.3.2 espressif/led_strip: ^2.5.5 espressif/esp_codec_dev: ~1.3.2 diff --git a/main/ota.cc b/main/ota.cc index 623e44d0..9aa0876e 100644 --- a/main/ota.cc +++ b/main/ota.cc @@ -285,7 +285,7 @@ void Ota::Upgrade(const std::string& firmware_url) { total_read += ret; if (esp_timer_get_time() - last_calc_time >= 1000000 || ret == 0) { size_t progress = total_read * 100 / content_length; - ESP_LOGI(TAG, "Progress: %zu%% (%zu/%zu), Speed: %zuB/s", progress, total_read, content_length, recent_read); + ESP_LOGI(TAG, "Progress: %u%% (%u/%u), Speed: %uB/s", progress, total_read, content_length, recent_read); if (upgrade_callback_) { upgrade_callback_(progress, recent_read); } diff --git a/main/protocols/mqtt_protocol.cc b/main/protocols/mqtt_protocol.cc index 89284dc7..d8b12b59 100644 --- a/main/protocols/mqtt_protocol.cc +++ b/main/protocols/mqtt_protocol.cc @@ -144,9 +144,7 @@ void MqttProtocol::SendAudio(const AudioStreamPacket& packet) { return; } - busy_sending_audio_ = true; udp_->Send(encrypted); - busy_sending_audio_ = false; } void MqttProtocol::CloseAudioChannel() { @@ -177,7 +175,6 @@ bool MqttProtocol::OpenAudioChannel() { } } - busy_sending_audio_ = false; error_occurred_ = false; session_id_ = ""; xEventGroupClearBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT); @@ -207,7 +204,7 @@ bool MqttProtocol::OpenAudioChannel() { * |payload payload_len| */ if (data.size() < sizeof(aes_nonce_)) { - ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size()); + ESP_LOGE(TAG, "Invalid audio packet size: %u", data.size()); return; } if (data[0] != 0x01) { diff --git a/main/protocols/protocol.cc b/main/protocols/protocol.cc index cc35b698..d5c1cb5e 100644 --- a/main/protocols/protocol.cc +++ b/main/protocols/protocol.cc @@ -130,8 +130,3 @@ bool Protocol::IsTimeout() const { } return timeout; } - -bool Protocol::IsAudioChannelBusy() const { - return busy_sending_audio_; -} - diff --git a/main/protocols/protocol.h b/main/protocols/protocol.h index c08802e6..210d6567 100644 --- a/main/protocols/protocol.h +++ b/main/protocols/protocol.h @@ -63,7 +63,6 @@ public: virtual bool OpenAudioChannel() = 0; virtual void CloseAudioChannel() = 0; virtual bool IsAudioChannelOpened() const = 0; - virtual bool IsAudioChannelBusy() const; virtual void SendAudio(const AudioStreamPacket& packet) = 0; virtual void SendWakeWordDetected(const std::string& wake_word); virtual void SendStartListening(ListeningMode mode); @@ -83,7 +82,6 @@ protected: int server_sample_rate_ = 24000; int server_frame_duration_ = 60; bool error_occurred_ = false; - bool busy_sending_audio_ = false; std::string session_id_; std::chrono::time_point last_incoming_time_; diff --git a/main/protocols/websocket_protocol.cc b/main/protocols/websocket_protocol.cc index 6cf76504..10aa4e9c 100644 --- a/main/protocols/websocket_protocol.cc +++ b/main/protocols/websocket_protocol.cc @@ -44,9 +44,7 @@ void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) { bp2->payload_size = htonl(packet.payload.size()); memcpy(bp2->payload, packet.payload.data(), packet.payload.size()); - busy_sending_audio_ = true; websocket_->Send(serialized.data(), serialized.size(), true); - busy_sending_audio_ = false; } else if (version_ == 3) { std::string serialized; serialized.resize(sizeof(BinaryProtocol3) + packet.payload.size()); @@ -56,13 +54,9 @@ void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) { bp3->payload_size = htons(packet.payload.size()); memcpy(bp3->payload, packet.payload.data(), packet.payload.size()); - busy_sending_audio_ = true; websocket_->Send(serialized.data(), serialized.size(), true); - busy_sending_audio_ = false; } else { - busy_sending_audio_ = true; websocket_->Send(packet.payload.data(), packet.payload.size(), true); - busy_sending_audio_ = false; } } @@ -104,7 +98,6 @@ bool WebsocketProtocol::OpenAudioChannel() { version_ = version; } - busy_sending_audio_ = false; error_occurred_ = false; websocket_ = Board::GetInstance().CreateWebSocket(); diff --git a/main/system_info.cc b/main/system_info.cc index 7f0ebb1c..769362da 100644 --- a/main/system_info.cc +++ b/main/system_info.cc @@ -47,7 +47,7 @@ std::string SystemInfo::GetChipModelName() { return std::string(CONFIG_IDF_TARGET); } -esp_err_t SystemInfo::PrintRealTimeStats(TickType_t xTicksToWait) { +esp_err_t SystemInfo::PrintTaskCpuUsage(TickType_t xTicksToWait) { #define ARRAY_SIZE_OFFSET 5 TaskStatus_t *start_array = NULL, *end_array = NULL; UBaseType_t start_array_size, end_array_size; @@ -132,3 +132,14 @@ exit: //Common return path return ret; } +void SystemInfo::PrintTaskList() { + char buffer[500]; + vTaskList(buffer); + ESP_LOGI(TAG, "Task list: \n%s", buffer); +} + +void SystemInfo::PrintHeapStats() { + int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL); + int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL); + ESP_LOGI(TAG, "free sram: %u minimal sram: %u", free_sram, min_free_sram); +} diff --git a/main/system_info.h b/main/system_info.h index 54d2c3e4..b29c72fb 100644 --- a/main/system_info.h +++ b/main/system_info.h @@ -13,7 +13,9 @@ public: static size_t GetFreeHeapSize(); static std::string GetMacAddress(); static std::string GetChipModelName(); - static esp_err_t PrintRealTimeStats(TickType_t xTicksToWait); + static esp_err_t PrintTaskCpuUsage(TickType_t xTicksToWait); + static void PrintTaskList(); + static void PrintHeapStats(); }; #endif // _SYSTEM_INFO_H_ diff --git a/sdkconfig.defaults b/sdkconfig.defaults index 575592c7..ded596dd 100644 --- a/sdkconfig.defaults +++ b/sdkconfig.defaults @@ -24,6 +24,12 @@ CONFIG_ESP_WIFI_IRAM_OPT=n CONFIG_ESP_WIFI_RX_IRAM_OPT=n CONFIG_ESP_WIFI_DYNAMIC_RX_MGMT_BUFFER=y +# These entries are copied from ESP-HI (ESP32C3) to reduce memory usage +CONFIG_ESP_WIFI_STATIC_RX_BUFFER_NUM=6 +CONFIG_ESP_WIFI_DYNAMIC_RX_BUFFER_NUM=8 +CONFIG_NEWLIB_NANO_FORMAT=y +CONFIG_ENTERPRISE_SUPPORT=n + CONFIG_CODEC_I2C_BACKWARD_COMPATIBLE=n # Fix ML307 FIFO Overflow