forked from xiaozhi/xiaozhi-esp32
v1.6.5: Improve performance and memory usage
This commit is contained in:
@@ -4,7 +4,7 @@
|
|||||||
# CMakeLists in this exact order for cmake to work correctly
|
# CMakeLists in this exact order for cmake to work correctly
|
||||||
cmake_minimum_required(VERSION 3.16)
|
cmake_minimum_required(VERSION 3.16)
|
||||||
|
|
||||||
set(PROJECT_VER "1.6.4")
|
set(PROJECT_VER "1.6.5")
|
||||||
|
|
||||||
# Add this line to disable the specific warning
|
# Add this line to disable the specific warning
|
||||||
add_compile_options(-Wno-missing-field-initializers)
|
add_compile_options(-Wno-missing-field-initializers)
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ void Application::CheckNewVersion() {
|
|||||||
|
|
||||||
ota_.StartUpgrade([display](int progress, size_t speed) {
|
ota_.StartUpgrade([display](int progress, size_t speed) {
|
||||||
char buffer[64];
|
char buffer[64];
|
||||||
snprintf(buffer, sizeof(buffer), "%d%% %zuKB/s", progress, speed / 1024);
|
snprintf(buffer, sizeof(buffer), "%d%% %uKB/s", progress, speed / 1024);
|
||||||
display->SetChatMessage("system", buffer);
|
display->SetChatMessage("system", buffer);
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -365,8 +365,8 @@ void Application::Start() {
|
|||||||
ESP_LOGI(TAG, "ML307 board detected, setting opus encoder complexity to 5");
|
ESP_LOGI(TAG, "ML307 board detected, setting opus encoder complexity to 5");
|
||||||
opus_encoder_->SetComplexity(5);
|
opus_encoder_->SetComplexity(5);
|
||||||
} else {
|
} else {
|
||||||
ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 3");
|
ESP_LOGI(TAG, "WiFi board detected, setting opus encoder complexity to 0");
|
||||||
opus_encoder_->SetComplexity(3);
|
opus_encoder_->SetComplexity(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (codec->input_sample_rate() != 16000) {
|
if (codec->input_sample_rate() != 16000) {
|
||||||
@@ -418,9 +418,8 @@ void Application::Start() {
|
|||||||
Alert(Lang::Strings::ERROR, message.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION);
|
Alert(Lang::Strings::ERROR, message.c_str(), "sad", Lang::Sounds::P3_EXCLAMATION);
|
||||||
});
|
});
|
||||||
protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
|
protocol_->OnIncomingAudio([this](AudioStreamPacket&& packet) {
|
||||||
const int max_packets_in_queue = 600 / OPUS_FRAME_DURATION_MS;
|
|
||||||
std::lock_guard<std::mutex> lock(mutex_);
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
if (audio_decode_queue_.size() < max_packets_in_queue) {
|
if (audio_decode_queue_.size() < MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||||
audio_decode_queue_.emplace_back(std::move(packet));
|
audio_decode_queue_.emplace_back(std::move(packet));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -544,10 +543,17 @@ void Application::Start() {
|
|||||||
|
|
||||||
audio_processor_->Initialize(codec);
|
audio_processor_->Initialize(codec);
|
||||||
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
|
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
|
||||||
background_task_->Schedule([this, data = std::move(data)]() mutable {
|
{
|
||||||
if (protocol_->IsAudioChannelBusy()) {
|
std::lock_guard<std::mutex> lock(mutex_);
|
||||||
|
// We do not have a send queue yet, but all packets are sent by the main task
|
||||||
|
// so we use the main task queue to limit the number of packets
|
||||||
|
if (main_tasks_.size() > MAX_AUDIO_PACKETS_IN_QUEUE) {
|
||||||
|
ESP_LOGW(TAG, "Too many main tasks = %u, skip sending audio...", main_tasks_.size());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
background_task_->Schedule([this, data = std::move(data)]() mutable {
|
||||||
opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
|
opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
|
||||||
AudioStreamPacket packet;
|
AudioStreamPacket packet;
|
||||||
packet.payload = std::move(opus);
|
packet.payload = std::move(opus);
|
||||||
@@ -631,6 +637,9 @@ void Application::Start() {
|
|||||||
ResetDecoder();
|
ResetDecoder();
|
||||||
PlaySound(Lang::Sounds::P3_SUCCESS);
|
PlaySound(Lang::Sounds::P3_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print heap stats
|
||||||
|
SystemInfo::PrintHeapStats();
|
||||||
|
|
||||||
// Enter the main event loop
|
// Enter the main event loop
|
||||||
MainEventLoop();
|
MainEventLoop();
|
||||||
@@ -644,14 +653,9 @@ void Application::OnClockTimer() {
|
|||||||
|
|
||||||
// Print the debug info every 10 seconds
|
// Print the debug info every 10 seconds
|
||||||
if (clock_ticks_ % 10 == 0) {
|
if (clock_ticks_ % 10 == 0) {
|
||||||
// char buffer[500];
|
// SystemInfo::PrintTaskCpuUsage(pdMS_TO_TICKS(1000));
|
||||||
// vTaskList(buffer);
|
// SystemInfo::PrintTaskList();
|
||||||
// ESP_LOGI(TAG, "Task list: \n%s", buffer);
|
SystemInfo::PrintHeapStats();
|
||||||
// SystemInfo::PrintRealTimeStats(pdMS_TO_TICKS(1000));
|
|
||||||
|
|
||||||
int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
|
|
||||||
int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL);
|
|
||||||
ESP_LOGI(TAG, "Free internal: %u minimal internal: %u", free_sram, min_free_sram);
|
|
||||||
|
|
||||||
// If we have synchronized server time, set the status to clock "HH:MM" if the device is idle
|
// If we have synchronized server time, set the status to clock "HH:MM" if the device is idle
|
||||||
if (ota_.HasServerTime()) {
|
if (ota_.HasServerTime()) {
|
||||||
@@ -884,7 +888,7 @@ void Application::SetDeviceState(DeviceState state) {
|
|||||||
if (!audio_processor_->IsRunning()) {
|
if (!audio_processor_->IsRunning()) {
|
||||||
// Send the start listening command
|
// Send the start listening command
|
||||||
protocol_->SendStartListening(listening_mode_);
|
protocol_->SendStartListening(listening_mode_);
|
||||||
if (listening_mode_ == kListeningModeAutoStop && previous_state == kDeviceStateSpeaking) {
|
if (previous_state == kDeviceStateSpeaking) {
|
||||||
// FIXME: Wait for the speaker to empty the buffer
|
// FIXME: Wait for the speaker to empty the buffer
|
||||||
vTaskDelay(pdMS_TO_TICKS(120));
|
vTaskDelay(pdMS_TO_TICKS(120));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,9 +27,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SCHEDULE_EVENT (1 << 0)
|
#define SCHEDULE_EVENT (1 << 0)
|
||||||
#define AUDIO_INPUT_READY_EVENT (1 << 1)
|
#define CHECK_NEW_VERSION_DONE_EVENT (1 << 2)
|
||||||
#define AUDIO_OUTPUT_READY_EVENT (1 << 2)
|
|
||||||
#define CHECK_NEW_VERSION_DONE_EVENT (1 << 3)
|
|
||||||
|
|
||||||
enum DeviceState {
|
enum DeviceState {
|
||||||
kDeviceStateUnknown,
|
kDeviceStateUnknown,
|
||||||
@@ -45,6 +43,7 @@ enum DeviceState {
|
|||||||
};
|
};
|
||||||
|
|
||||||
#define OPUS_FRAME_DURATION_MS 60
|
#define OPUS_FRAME_DURATION_MS 60
|
||||||
|
#define MAX_AUDIO_PACKETS_IN_QUEUE (2400 / OPUS_FRAME_DURATION_MS)
|
||||||
|
|
||||||
class Application {
|
class Application {
|
||||||
public:
|
public:
|
||||||
|
|||||||
@@ -137,8 +137,8 @@ void WakeWordDetect::AudioDetectionTask() {
|
|||||||
void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) {
|
||||||
// store audio data to wake_word_pcm_
|
// store audio data to wake_word_pcm_
|
||||||
wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
|
wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
|
||||||
// keep about 2 seconds of data, detect duration is 32ms (sample_rate == 16000, chunksize == 512)
|
// keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
|
||||||
while (wake_word_pcm_.size() > 2000 / 32) {
|
while (wake_word_pcm_.size() > 2000 / 30) {
|
||||||
wake_word_pcm_.pop_front();
|
wake_word_pcm_.pop_front();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -165,7 +165,7 @@ void WakeWordDetect::EncodeWakeWordData() {
|
|||||||
this_->wake_word_pcm_.clear();
|
this_->wake_word_pcm_.clear();
|
||||||
|
|
||||||
auto end_time = esp_timer_get_time();
|
auto end_time = esp_timer_get_time();
|
||||||
ESP_LOGI(TAG, "Encode wake word opus %zu packets in %lld ms",
|
ESP_LOGI(TAG, "Encode wake word opus %u packets in %lld ms",
|
||||||
this_->wake_word_opus_.size(), (end_time - start_time) / 1000);
|
this_->wake_word_opus_.size(), (end_time - start_time) / 1000);
|
||||||
|
|
||||||
std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
|
std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ dependencies:
|
|||||||
78/esp_lcd_nv3023: ~1.0.0
|
78/esp_lcd_nv3023: ~1.0.0
|
||||||
78/esp-wifi-connect: ~2.4.2
|
78/esp-wifi-connect: ~2.4.2
|
||||||
78/esp-opus-encoder: ~2.3.2
|
78/esp-opus-encoder: ~2.3.2
|
||||||
78/esp-ml307: ~2.1.0
|
78/esp-ml307: ~2.1.2
|
||||||
78/xiaozhi-fonts: ~1.3.2
|
78/xiaozhi-fonts: ~1.3.2
|
||||||
espressif/led_strip: ^2.5.5
|
espressif/led_strip: ^2.5.5
|
||||||
espressif/esp_codec_dev: ~1.3.2
|
espressif/esp_codec_dev: ~1.3.2
|
||||||
|
|||||||
@@ -285,7 +285,7 @@ void Ota::Upgrade(const std::string& firmware_url) {
|
|||||||
total_read += ret;
|
total_read += ret;
|
||||||
if (esp_timer_get_time() - last_calc_time >= 1000000 || ret == 0) {
|
if (esp_timer_get_time() - last_calc_time >= 1000000 || ret == 0) {
|
||||||
size_t progress = total_read * 100 / content_length;
|
size_t progress = total_read * 100 / content_length;
|
||||||
ESP_LOGI(TAG, "Progress: %zu%% (%zu/%zu), Speed: %zuB/s", progress, total_read, content_length, recent_read);
|
ESP_LOGI(TAG, "Progress: %u%% (%u/%u), Speed: %uB/s", progress, total_read, content_length, recent_read);
|
||||||
if (upgrade_callback_) {
|
if (upgrade_callback_) {
|
||||||
upgrade_callback_(progress, recent_read);
|
upgrade_callback_(progress, recent_read);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -144,9 +144,7 @@ void MqttProtocol::SendAudio(const AudioStreamPacket& packet) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
busy_sending_audio_ = true;
|
|
||||||
udp_->Send(encrypted);
|
udp_->Send(encrypted);
|
||||||
busy_sending_audio_ = false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void MqttProtocol::CloseAudioChannel() {
|
void MqttProtocol::CloseAudioChannel() {
|
||||||
@@ -177,7 +175,6 @@ bool MqttProtocol::OpenAudioChannel() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
busy_sending_audio_ = false;
|
|
||||||
error_occurred_ = false;
|
error_occurred_ = false;
|
||||||
session_id_ = "";
|
session_id_ = "";
|
||||||
xEventGroupClearBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT);
|
xEventGroupClearBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT);
|
||||||
@@ -207,7 +204,7 @@ bool MqttProtocol::OpenAudioChannel() {
|
|||||||
* |payload payload_len|
|
* |payload payload_len|
|
||||||
*/
|
*/
|
||||||
if (data.size() < sizeof(aes_nonce_)) {
|
if (data.size() < sizeof(aes_nonce_)) {
|
||||||
ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size());
|
ESP_LOGE(TAG, "Invalid audio packet size: %u", data.size());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (data[0] != 0x01) {
|
if (data[0] != 0x01) {
|
||||||
|
|||||||
@@ -130,8 +130,3 @@ bool Protocol::IsTimeout() const {
|
|||||||
}
|
}
|
||||||
return timeout;
|
return timeout;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Protocol::IsAudioChannelBusy() const {
|
|
||||||
return busy_sending_audio_;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|||||||
@@ -63,7 +63,6 @@ public:
|
|||||||
virtual bool OpenAudioChannel() = 0;
|
virtual bool OpenAudioChannel() = 0;
|
||||||
virtual void CloseAudioChannel() = 0;
|
virtual void CloseAudioChannel() = 0;
|
||||||
virtual bool IsAudioChannelOpened() const = 0;
|
virtual bool IsAudioChannelOpened() const = 0;
|
||||||
virtual bool IsAudioChannelBusy() const;
|
|
||||||
virtual void SendAudio(const AudioStreamPacket& packet) = 0;
|
virtual void SendAudio(const AudioStreamPacket& packet) = 0;
|
||||||
virtual void SendWakeWordDetected(const std::string& wake_word);
|
virtual void SendWakeWordDetected(const std::string& wake_word);
|
||||||
virtual void SendStartListening(ListeningMode mode);
|
virtual void SendStartListening(ListeningMode mode);
|
||||||
@@ -83,7 +82,6 @@ protected:
|
|||||||
int server_sample_rate_ = 24000;
|
int server_sample_rate_ = 24000;
|
||||||
int server_frame_duration_ = 60;
|
int server_frame_duration_ = 60;
|
||||||
bool error_occurred_ = false;
|
bool error_occurred_ = false;
|
||||||
bool busy_sending_audio_ = false;
|
|
||||||
std::string session_id_;
|
std::string session_id_;
|
||||||
std::chrono::time_point<std::chrono::steady_clock> last_incoming_time_;
|
std::chrono::time_point<std::chrono::steady_clock> last_incoming_time_;
|
||||||
|
|
||||||
|
|||||||
@@ -44,9 +44,7 @@ void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) {
|
|||||||
bp2->payload_size = htonl(packet.payload.size());
|
bp2->payload_size = htonl(packet.payload.size());
|
||||||
memcpy(bp2->payload, packet.payload.data(), packet.payload.size());
|
memcpy(bp2->payload, packet.payload.data(), packet.payload.size());
|
||||||
|
|
||||||
busy_sending_audio_ = true;
|
|
||||||
websocket_->Send(serialized.data(), serialized.size(), true);
|
websocket_->Send(serialized.data(), serialized.size(), true);
|
||||||
busy_sending_audio_ = false;
|
|
||||||
} else if (version_ == 3) {
|
} else if (version_ == 3) {
|
||||||
std::string serialized;
|
std::string serialized;
|
||||||
serialized.resize(sizeof(BinaryProtocol3) + packet.payload.size());
|
serialized.resize(sizeof(BinaryProtocol3) + packet.payload.size());
|
||||||
@@ -56,13 +54,9 @@ void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) {
|
|||||||
bp3->payload_size = htons(packet.payload.size());
|
bp3->payload_size = htons(packet.payload.size());
|
||||||
memcpy(bp3->payload, packet.payload.data(), packet.payload.size());
|
memcpy(bp3->payload, packet.payload.data(), packet.payload.size());
|
||||||
|
|
||||||
busy_sending_audio_ = true;
|
|
||||||
websocket_->Send(serialized.data(), serialized.size(), true);
|
websocket_->Send(serialized.data(), serialized.size(), true);
|
||||||
busy_sending_audio_ = false;
|
|
||||||
} else {
|
} else {
|
||||||
busy_sending_audio_ = true;
|
|
||||||
websocket_->Send(packet.payload.data(), packet.payload.size(), true);
|
websocket_->Send(packet.payload.data(), packet.payload.size(), true);
|
||||||
busy_sending_audio_ = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -104,7 +98,6 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
version_ = version;
|
version_ = version;
|
||||||
}
|
}
|
||||||
|
|
||||||
busy_sending_audio_ = false;
|
|
||||||
error_occurred_ = false;
|
error_occurred_ = false;
|
||||||
|
|
||||||
websocket_ = Board::GetInstance().CreateWebSocket();
|
websocket_ = Board::GetInstance().CreateWebSocket();
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ std::string SystemInfo::GetChipModelName() {
|
|||||||
return std::string(CONFIG_IDF_TARGET);
|
return std::string(CONFIG_IDF_TARGET);
|
||||||
}
|
}
|
||||||
|
|
||||||
esp_err_t SystemInfo::PrintRealTimeStats(TickType_t xTicksToWait) {
|
esp_err_t SystemInfo::PrintTaskCpuUsage(TickType_t xTicksToWait) {
|
||||||
#define ARRAY_SIZE_OFFSET 5
|
#define ARRAY_SIZE_OFFSET 5
|
||||||
TaskStatus_t *start_array = NULL, *end_array = NULL;
|
TaskStatus_t *start_array = NULL, *end_array = NULL;
|
||||||
UBaseType_t start_array_size, end_array_size;
|
UBaseType_t start_array_size, end_array_size;
|
||||||
@@ -132,3 +132,14 @@ exit: //Common return path
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SystemInfo::PrintTaskList() {
|
||||||
|
char buffer[500];
|
||||||
|
vTaskList(buffer);
|
||||||
|
ESP_LOGI(TAG, "Task list: \n%s", buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SystemInfo::PrintHeapStats() {
|
||||||
|
int free_sram = heap_caps_get_free_size(MALLOC_CAP_INTERNAL);
|
||||||
|
int min_free_sram = heap_caps_get_minimum_free_size(MALLOC_CAP_INTERNAL);
|
||||||
|
ESP_LOGI(TAG, "free sram: %u minimal sram: %u", free_sram, min_free_sram);
|
||||||
|
}
|
||||||
|
|||||||
@@ -13,7 +13,9 @@ public:
|
|||||||
static size_t GetFreeHeapSize();
|
static size_t GetFreeHeapSize();
|
||||||
static std::string GetMacAddress();
|
static std::string GetMacAddress();
|
||||||
static std::string GetChipModelName();
|
static std::string GetChipModelName();
|
||||||
static esp_err_t PrintRealTimeStats(TickType_t xTicksToWait);
|
static esp_err_t PrintTaskCpuUsage(TickType_t xTicksToWait);
|
||||||
|
static void PrintTaskList();
|
||||||
|
static void PrintHeapStats();
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // _SYSTEM_INFO_H_
|
#endif // _SYSTEM_INFO_H_
|
||||||
|
|||||||
@@ -24,6 +24,12 @@ CONFIG_ESP_WIFI_IRAM_OPT=n
|
|||||||
CONFIG_ESP_WIFI_RX_IRAM_OPT=n
|
CONFIG_ESP_WIFI_RX_IRAM_OPT=n
|
||||||
CONFIG_ESP_WIFI_DYNAMIC_RX_MGMT_BUFFER=y
|
CONFIG_ESP_WIFI_DYNAMIC_RX_MGMT_BUFFER=y
|
||||||
|
|
||||||
|
# These entries are copied from ESP-HI (ESP32C3) to reduce memory usage
|
||||||
|
CONFIG_ESP_WIFI_STATIC_RX_BUFFER_NUM=6
|
||||||
|
CONFIG_ESP_WIFI_DYNAMIC_RX_BUFFER_NUM=8
|
||||||
|
CONFIG_NEWLIB_NANO_FORMAT=y
|
||||||
|
CONFIG_ENTERPRISE_SUPPORT=n
|
||||||
|
|
||||||
CONFIG_CODEC_I2C_BACKWARD_COMPATIBLE=n
|
CONFIG_CODEC_I2C_BACKWARD_COMPATIBLE=n
|
||||||
|
|
||||||
# Fix ML307 FIFO Overflow
|
# Fix ML307 FIFO Overflow
|
||||||
|
|||||||
Reference in New Issue
Block a user