diff --git a/main/Application.cc b/main/Application.cc index 4d798c7d..c43ceebd 100644 --- a/main/Application.cc +++ b/main/Application.cc @@ -277,10 +277,14 @@ void Application::EncodeWakeWordData() { } wake_word_encode_task_ = xTaskCreateStatic([](void* arg) { Application* app = (Application*)arg; + auto start_time = esp_timer_get_time(); // encode detect packets - for (auto& pcm : app->wake_word_pcm_) { - app->opus_encoder_.Encode(pcm, [app](const iovec opus) { - // append the opus data to the packet + OpusEncoder* encoder = new OpusEncoder(); + encoder->Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1, 60); + encoder->SetComplexity(2); + + for (auto& pcm: app->wake_word_pcm_) { + encoder->Encode(pcm, [app](const iovec opus) { iovec iov = { .iov_base = heap_caps_malloc(opus.iov_len, MALLOC_CAP_SPIRAM), .iov_len = opus.iov_len @@ -288,10 +292,14 @@ void Application::EncodeWakeWordData() { memcpy(iov.iov_base, opus.iov_base, opus.iov_len); app->wake_word_opus_.push_back(iov); }); - free(pcm.iov_base); + heap_caps_free(pcm.iov_base); } app->wake_word_pcm_.clear(); + + auto end_time = esp_timer_get_time(); + ESP_LOGI(TAG, "Encode wake word data opus packets: %d in %lld ms", app->wake_word_opus_.size(), (end_time - start_time) / 1000); xEventGroupSetBits(app->event_group_, DETECT_PACKETS_ENCODED); + delete encoder; vTaskDelete(NULL); }, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_); } @@ -333,7 +341,7 @@ void Application::AudioDetectionTask() { StartWebSocketClient(); // Here the websocket is done, and we also wait for the wake word data to be encoded - xEventGroupWaitBits(event_group_, DETECT_PACKETS_ENCODED, pdFALSE, pdTRUE, portMAX_DELAY); + xEventGroupWaitBits(event_group_, DETECT_PACKETS_ENCODED, pdTRUE, pdTRUE, portMAX_DELAY); std::lock_guard lock(mutex_); if (ws_client_ && ws_client_->IsConnected()) { @@ -341,6 +349,7 @@ void Application::AudioDetectionTask() { SendWakeWordData(); // Send a ready message to indicate the server that the wake word data is sent SetChatState(kChatStateWakeWordDetected); + opus_encoder_.ResetState(); // If connected, the hello message is already sent, so we can start communication xEventGroupSetBits(event_group_, COMMUNICATION_RUNNING); @@ -405,11 +414,6 @@ void Application::AudioEncodeTask() { iovec pcm; xQueueReceive(audio_encode_queue_, &pcm, portMAX_DELAY); - if (pcm.iov_len == 0) { - ESP_LOGE(TAG, "Empty audio data"); - continue; - } - // Encode audio data opus_encoder_.Encode(pcm, [this](const iovec opus) { std::lock_guard lock(mutex_); diff --git a/main/OpusEncoder.cc b/main/OpusEncoder.cc index af65d432..6970c38d 100644 --- a/main/OpusEncoder.cc +++ b/main/OpusEncoder.cc @@ -28,13 +28,18 @@ void OpusEncoder::Configure(int sample_rate, int channels, int duration_ms) { // Set DTX opus_encoder_ctl(audio_enc_, OPUS_SET_DTX(1)); - // Set complexity to 5 - opus_encoder_ctl(audio_enc_, OPUS_SET_COMPLEXITY(5)); + SetComplexity(5); frame_size_ = sample_rate / 1000 * duration_ms; out_buffer_.resize(sample_rate * channels * sizeof(int16_t)); } +void OpusEncoder::SetComplexity(int complexity) { + if (audio_enc_ != nullptr) { + opus_encoder_ctl(audio_enc_, OPUS_SET_COMPLEXITY(complexity)); + } +} + void OpusEncoder::Encode(const iovec pcm, std::function handler) { if (audio_enc_ == nullptr) { ESP_LOGE(TAG, "Audio encoder is not configured"); @@ -58,3 +63,10 @@ void OpusEncoder::Encode(const iovec pcm, std::function in_buffer_.erase(in_buffer_.begin(), in_buffer_.begin() + frame_size_); } } + +void OpusEncoder::ResetState() { + if (audio_enc_ != nullptr) { + opus_encoder_ctl(audio_enc_, OPUS_RESET_STATE); + } + in_buffer_.clear(); +} diff --git a/main/OpusEncoder.h b/main/OpusEncoder.h index 34cdd181..ae8e1998 100644 --- a/main/OpusEncoder.h +++ b/main/OpusEncoder.h @@ -16,8 +16,10 @@ public: ~OpusEncoder(); void Configure(int sample_rate, int channels, int duration_ms = 60); + void SetComplexity(int complexity); void Encode(const iovec pcm, std::function handler); bool IsBufferEmpty() const { return in_buffer_.empty(); } + void ResetState(); private: struct OpusEncoder* audio_enc_ = nullptr;