v1.6.5: Improve performance and memory usage

2025-05-26 14:30:44 +08:00
parent 277f87ae5f
commit 0c57df1cd8
13 changed files with 50 additions and 45 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,7 +4,7 @@
 # CMakeLists in this exact order for cmake to work correctly
 cmake_minimum_required(VERSION 3.16)
-set(PROJECT_VER "1.6.4")
+set(PROJECT_VER "1.6.5")
 # Add this line to disable the specific warning
 add_compile_options(-Wno-missing-field-initializers)
--- a/main/application.cc
+++ b/main/application.cc
@@ -139,7 +139,7 @@ void Application::CheckNewVersion() {
            ota_.StartUpgrade([display](int progress, size_t speed) {
                char buffer[64];
-                snprintf(buffer, sizeof(buffer), "%d%% %zuKB/s", progress, speed / 1024);
+                snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
                display->SetChatMessage("system", buffer);
            });
@@ -365,8 +365,8 @@ void Application::Start() {
        ESP_LOGI(TAG, "ML307 board detected, setting opus encoder complexity to 5");
        opus_encoder_->SetComplexity(5);
    } else {
-        ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 3");
+        ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 0");
-        opus_encoder_->SetComplexity(3);
+        opus_encoder_->SetComplexity(0);
    }
    if (codec->input_sample_rate() != 16000) {
@@ -418,9 +418,8 @@ void Application::Start() {
        Alert(Lang::Strings::ERROR, message.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION);
    });
    protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
        const int max_packets_in_queue = 600 / OPUS_FRAME_DURATION_MS;
        std::lock_guard<std::mutex> lock(mutex_);
-        if (audio_decode_queue_.size() < max_packets_in_queue) {
+        if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
            audio_decode_queue_.emplace_back(std::move(packet));
        }
    });
@@ -544,10 +543,17 @@ void Application::Start() {
    audio_processor_->Initialize(codec);
    audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
-        background_task_->Schedule([this, data = std::move(data)]() mutable {
+        {
-            if (protocol_->IsAudioChannelBusy()) {
+            std::lock_guard<std::mutex> lock(mutex_);
            // We do not have a send queue yet, but all packets are sent by the main task
            // so we use the main task queue to limit the number of packets
            if (main_tasks_.size() > MAX_AUDIO_PACKETS_IN_QUEUE) {
                ESP_LOGW(TAG, "Too many main tasks = %u, skip sending audio...", main_tasks_.size());
                return;
            }
        }
        background_task_->Schedule([this, data = std::move(data)]() mutable {
            opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
                AudioStreamPacket packet;
                packet.payload = std::move(opus);
@@ -631,6 +637,9 @@ void Application::Start() {
        ResetDecoder();
        PlaySound(Lang::Sounds::P3_SUCCESS);
    }
    // Print heap stats
    SystemInfo::PrintHeapStats();
    // Enter the main event loop
    MainEventLoop();
@@ -644,14 +653,9 @@ void Application::OnClockTimer() {
    // Print the debug info every 10 seconds
    if (clock_ticks_ % 10 == 0) {
-        // char buffer[500];
+        // SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
-        // vTaskList(buffer);
+        // SystemInfo::PrintTaskList();
-        // ESP_LOGI(TAG, "Task list: \n%s", buffer);
+        SystemInfo::PrintHeapStats();
        // SystemInfo::PrintRealTimeStats(pdMS_TO_TICKS(1000));
        int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
        int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL);
        ESP_LOGI(TAG, "Free internal: %u minimal internal: %u", free_sram, min_free_sram);
        // If we have synchronized server time, set the status to clock "HH:MM" if the device is idle
        if (ota_.HasServerTime()) {
@@ -884,7 +888,7 @@ void Application::SetDeviceState(DeviceState state) {
            if (!audio_processor_->IsRunning()) {
                // Send the start listening command
                protocol_->SendStartListening(listening_mode_);
-                if (listening_mode_ == kListeningModeAutoStop && previous_state == kDeviceStateSpeaking) {
+                if (previous_state == kDeviceStateSpeaking) {
                    // FIXME: Wait for the speaker to empty the buffer
                    vTaskDelay(pdMS_TO_TICKS(120));
                }
--- a/main/application.h
+++ b/main/application.h
@@ -27,9 +27,7 @@
 #endif
 #define SCHEDULE_EVENT (1 << 0)
-#define AUDIO_INPUT_READY_EVENT (1 << 1)
+#define CHECK_NEW_VERSION_DONE_EVENT (1 << 2)
 #define AUDIO_OUTPUT_READY_EVENT (1 << 2)
 #define CHECK_NEW_VERSION_DONE_EVENT (1 << 3)
 enum DeviceState {
    kDeviceStateUnknown,
@@ -45,6 +43,7 @@ enum DeviceState {
 };
 #define OPUS_FRAME_DURATION_MS 60
 #define MAX_AUDIO_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS)
 class Application {
 public:
--- a/main/audio_processing/wake_word_detect.cc
+++ b/main/audio_processing/wake_word_detect.cc
@@ -137,8 +137,8 @@ void WakeWordDetect::AudioDetectionTask() {
 void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
    // store audio data to wake_word_pcm_
    wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
-    // keep about 2 seconds of data, detect duration is 32ms (sample_rate == 16000, chunksize == 512)
+    // keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
-    while (wake_word_pcm_.size() > 2000 / 32) {
+    while (wake_word_pcm_.size() > 2000 / 30) {
        wake_word_pcm_.pop_front();
    }
 }
@@ -165,7 +165,7 @@ void WakeWordDetect::EncodeWakeWordData() {
            this_->wake_word_pcm_.clear();
            auto end_time = esp_timer_get_time();
-            ESP_LOGI(TAG, "Encode wake word opus %zu packets in %lld ms",
+            ESP_LOGI(TAG, "Encode wake word opus %u packets in %lld ms",
                this_->wake_word_opus_.size(), (end_time - start_time) / 1000);
            std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
--- a/main/idf_component.yml
+++ b/main/idf_component.yml
@@ -11,7 +11,7 @@ dependencies:
  78/esp_lcd_nv3023: ~1.0.0
  78/esp-wifi-connect: ~2.4.2
  78/esp-opus-encoder: ~2.3.2
-  78/esp-ml307: ~2.1.0
+  78/esp-ml307: ~2.1.2
  78/xiaozhi-fonts: ~1.3.2
  espressif/led_strip: ^2.5.5
  espressif/esp_codec_dev: ~1.3.2
--- a/main/ota.cc
+++ b/main/ota.cc
@@ -285,7 +285,7 @@ void Ota::Upgrade(const std::string& firmware_url) {
        total_read += ret;
        if (esp_timer_get_time() - last_calc_time >= 1000000 || ret == 0) {
            size_t progress = total_read * 100 / content_length;
-            ESP_LOGI(TAG, "Progress: %zu%% (%zu/%zu), Speed: %zuB/s", progress, total_read, content_length, recent_read);
+            ESP_LOGI(TAG, "Progress: %u%% (%u/%u), Speed: %uB/s", progress, total_read, content_length, recent_read);
            if (upgrade_callback_) {
                upgrade_callback_(progress, recent_read);
            }
--- a/main/protocols/mqtt_protocol.cc
+++ b/main/protocols/mqtt_protocol.cc
@@ -144,9 +144,7 @@ void MqttProtocol::SendAudio(const AudioStreamPacket& packet) {
        return;
    }
    busy_sending_audio_ = true;
    udp_->Send(encrypted);
    busy_sending_audio_ = false;
 }
 void MqttProtocol::CloseAudioChannel() {
@@ -177,7 +175,6 @@ bool MqttProtocol::OpenAudioChannel() {
        }
    }
    busy_sending_audio_ = false;
    error_occurred_ = false;
    session_id_ = "";
    xEventGroupClearBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT);
@@ -207,7 +204,7 @@ bool MqttProtocol::OpenAudioChannel() {
         * |payload payload_len|
         */
        if (data.size() < sizeof(aes_nonce_)) {
-            ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size());
+            ESP_LOGE(TAG, "Invalid audio packet size: %u", data.size());
            return;
        }
        if (data[0] != 0x01) {
--- a/main/protocols/protocol.cc
+++ b/main/protocols/protocol.cc
@@ -130,8 +130,3 @@ bool Protocol::IsTimeout() const {
    }
    return timeout;
 }
 bool Protocol::IsAudioChannelBusy() const {
    return busy_sending_audio_;
 }
--- a/main/protocols/protocol.h
+++ b/main/protocols/protocol.h
@@ -63,7 +63,6 @@ public:
    virtual bool OpenAudioChannel() = 0;
    virtual void CloseAudioChannel() = 0;
    virtual bool IsAudioChannelOpened() const = 0;
    virtual bool IsAudioChannelBusy() const;
    virtual void SendAudio(const AudioStreamPacket& packet) = 0;
    virtual void SendWakeWordDetected(const std::string& wake_word);
    virtual void SendStartListening(ListeningMode mode);
@@ -83,7 +82,6 @@ protected:
    int server_sample_rate_ = 24000;
    int server_frame_duration_ = 60;
    bool error_occurred_ = false;
    bool busy_sending_audio_ = false;
    std::string session_id_;
    std::chrono::time_point<std::chrono::steady_clock> last_incoming_time_;
--- a/main/protocols/websocket_protocol.cc
+++ b/main/protocols/websocket_protocol.cc
@@ -44,9 +44,7 @@ void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) {
        bp2->payload_size = htonl(packet.payload.size());
        memcpy(bp2->payload, packet.payload.data(), packet.payload.size());
        busy_sending_audio_ = true;
        websocket_->Send(serialized.data(), serialized.size(), true);
        busy_sending_audio_ = false;
    } else if (version_ == 3) {
        std::string serialized;
        serialized.resize(sizeof(BinaryProtocol3) + packet.payload.size());
@@ -56,13 +54,9 @@ void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) {
        bp3->payload_size = htons(packet.payload.size());
        memcpy(bp3->payload, packet.payload.data(), packet.payload.size());
        busy_sending_audio_ = true;
        websocket_->Send(serialized.data(), serialized.size(), true);
        busy_sending_audio_ = false;
    } else {
        busy_sending_audio_ = true;
        websocket_->Send(packet.payload.data(), packet.payload.size(), true);
        busy_sending_audio_ = false;
    }
 }
@@ -104,7 +98,6 @@ bool WebsocketProtocol::OpenAudioChannel() {
        version_ = version;
    }
    busy_sending_audio_ = false;
    error_occurred_ = false;
    websocket_ = Board::GetInstance().CreateWebSocket();
--- a/main/system_info.cc
+++ b/main/system_info.cc
@@ -47,7 +47,7 @@ std::string SystemInfo::GetChipModelName() {
    return std::string(CONFIG_IDF_TARGET);
 }
-esp_err_t SystemInfo::PrintRealTimeStats(TickType_t xTicksToWait) {
+esp_err_t SystemInfo::PrintTaskCpuUsage(TickType_t xTicksToWait) {
    #define ARRAY_SIZE_OFFSET 5
    TaskStatus_t *start_array = NULL, *end_array = NULL;
    UBaseType_t start_array_size, end_array_size;
@@ -132,3 +132,14 @@ exit:    //Common return path
    return ret;
 }
 void SystemInfo::PrintTaskList() {
    char buffer[500];
    vTaskList(buffer);
    ESP_LOGI(TAG, "Task list: \n%s", buffer);
 }
 void SystemInfo::PrintHeapStats() {
    int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
    int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL);
    ESP_LOGI(TAG, "free sram: %u minimal sram: %u", free_sram, min_free_sram);
 }
--- a/main/system_info.h
+++ b/main/system_info.h
@@ -13,7 +13,9 @@ public:
    static size_t GetFreeHeapSize();
    static std::string GetMacAddress();
    static std::string GetChipModelName();
-    static esp_err_t PrintRealTimeStats(TickType_t xTicksToWait);
+    static esp_err_t PrintTaskCpuUsage(TickType_t xTicksToWait);
    static void PrintTaskList();
    static void PrintHeapStats();
 };
 #endif // _SYSTEM_INFO_H_
--- a/sdkconfig.defaults
+++ b/sdkconfig.defaults
@@ -24,6 +24,12 @@ CONFIG_ESP_WIFI_IRAM_OPT=n
 CONFIG_ESP_WIFI_RX_IRAM_OPT=n
 CONFIG_ESP_WIFI_DYNAMIC_RX_MGMT_BUFFER=y
 # These entries are copied from ESP-HI (ESP32C3) to reduce memory usage
 CONFIG_ESP_WIFI_STATIC_RX_BUFFER_NUM=6
 CONFIG_ESP_WIFI_DYNAMIC_RX_BUFFER_NUM=8
 CONFIG_NEWLIB_NANO_FORMAT=y
 CONFIG_ENTERPRISE_SUPPORT=n
 CONFIG_CODEC_I2C_BACKWARD_COMPATIBLE=n
 # Fix ML307 FIFO Overflow