From be18c1aa04669f4551ec9ec7ea2bd859552824c8 Mon Sep 17 00:00:00 2001 From: Xiaoxia Date: Fri, 9 May 2025 14:00:26 +0800 Subject: [PATCH] Add server AEC option --- main/Kconfig.projbuild | 15 +++++++--- main/application.cc | 12 ++++++-- main/application.h | 2 +- main/audio_processing/afe_audio_processor.cc | 28 +++++++++---------- main/audio_processing/afe_audio_processor.h | 2 +- main/audio_processing/audio_processor.h | 2 +- .../audio_processing/dummy_audio_processor.cc | 2 +- main/boards/esp-box-3/config.json | 2 +- main/boards/lichuang-dev/config.json | 2 +- main/protocols/mqtt_protocol.cc | 3 ++ main/protocols/websocket_protocol.cc | 3 ++ 11 files changed, 47 insertions(+), 26 deletions(-) diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index 96567005..68a50906 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -248,11 +248,18 @@ config USE_AUDIO_PROCESSOR help 需要 ESP32 S3 与 AFE 支持 -config USE_REALTIME_CHAT - bool "启用可语音打断的实时对话模式(需要 AEC 支持)" +config USE_DEVICE_AEC + bool "在通话过程中启用设备端 AEC" default n depends on USE_AUDIO_PROCESSOR && (BOARD_TYPE_ESP_BOX_3 || BOARD_TYPE_ESP_BOX || BOARD_TYPE_ESP_BOX_LITE || BOARD_TYPE_LICHUANG_DEV || BOARD_TYPE_ESP32S3_KORVO2_V3) help - 需要 ESP32 S3 与 AEC 开启,因为性能不够,不建议和微信聊天界面风格同时开启 - + 因为性能不够,不建议和微信聊天界面风格同时开启 + +config USE_SERVER_AEC + bool "在通话过程中启用服务器端 AEC" + default n + depends on USE_AUDIO_PROCESSOR + help + 启用服务器端 AEC,需要服务器支持 + endmenu diff --git a/main/application.cc b/main/application.cc index bc16b71e..dc212fd7 100644 --- a/main/application.cc +++ b/main/application.cc @@ -375,11 +375,19 @@ void Application::Start() { } codec->Start(); +#if CONFIG_USE_AUDIO_PROCESSOR xTaskCreatePinnedToCore([](void* arg) { Application* app = (Application*)arg; app->AudioLoop(); vTaskDelete(NULL); - }, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_, realtime_chat_enabled_ ? 1 : 0); + }, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_, 1); +#else + xTaskCreate([](void* arg) { + Application* app = (Application*)arg; + app->AudioLoop(); + vTaskDelete(NULL); + }, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_); +#endif /* Wait for the network to be ready */ board.StartNetwork(); @@ -514,7 +522,7 @@ void Application::Start() { }); bool protocol_started = protocol_->Start(); - audio_processor_->Initialize(codec, realtime_chat_enabled_); + audio_processor_->Initialize(codec); audio_processor_->OnOutput([this](std::vector&& data) { background_task_->Schedule([this, data = std::move(data)]() mutable { if (protocol_->IsAudioChannelBusy()) { diff --git a/main/application.h b/main/application.h index 4291ed77..12cd7f73 100644 --- a/main/application.h +++ b/main/application.h @@ -89,7 +89,7 @@ private: esp_timer_handle_t clock_timer_handle_ = nullptr; volatile DeviceState device_state_ = kDeviceStateUnknown; ListeningMode listening_mode_ = kListeningModeAutoStop; -#if CONFIG_USE_REALTIME_CHAT +#if CONFIG_USE_DEVICE_AEC || CONFIG_USE_SERVER_AEC bool realtime_chat_enabled_ = true; #else bool realtime_chat_enabled_ = false; diff --git a/main/audio_processing/afe_audio_processor.cc b/main/audio_processing/afe_audio_processor.cc index 30898cab..fa931d80 100644 --- a/main/audio_processing/afe_audio_processor.cc +++ b/main/audio_processing/afe_audio_processor.cc @@ -10,7 +10,7 @@ AfeAudioProcessor::AfeAudioProcessor() event_group_ = xEventGroupCreate(); } -void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { +void AfeAudioProcessor::Initialize(AudioCodec* codec) { codec_ = codec; int ref_num = codec_->input_reference() ? 1 : 0; @@ -26,22 +26,22 @@ void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { char* ns_model_name = esp_srmodel_filter(models, ESP_NSNET_PREFIX, NULL); afe_config_t* afe_config = afe_config_init(input_format.c_str(), NULL, AFE_TYPE_VC, AFE_MODE_HIGH_PERF); - if (realtime_chat) { - afe_config->aec_init = true; - afe_config->aec_mode = AEC_MODE_VOIP_HIGH_PERF; - } else { - afe_config->aec_init = false; - } +#ifdef CONFIG_USE_DEVICE_AEC + afe_config->aec_init = true; + afe_config->aec_mode = AEC_MODE_VOIP_HIGH_PERF; +#else + afe_config->aec_init = false; +#endif afe_config->ns_init = true; afe_config->ns_model_name = ns_model_name; afe_config->afe_ns_mode = AFE_NS_MODE_NET; - if (realtime_chat) { - afe_config->vad_init = false; - } else { - afe_config->vad_init = true; - afe_config->vad_mode = VAD_MODE_0; - afe_config->vad_min_noise_ms = 100; - } +#ifdef CONFIG_USE_DEVICE_AEC + afe_config->vad_init = false; +#else + afe_config->vad_init = true; + afe_config->vad_mode = VAD_MODE_0; + afe_config->vad_min_noise_ms = 100; +#endif afe_config->afe_perferred_core = 1; afe_config->afe_perferred_priority = 1; afe_config->agc_init = false; diff --git a/main/audio_processing/afe_audio_processor.h b/main/audio_processing/afe_audio_processor.h index a7aa1d2c..9a9cfdaf 100644 --- a/main/audio_processing/afe_audio_processor.h +++ b/main/audio_processing/afe_audio_processor.h @@ -18,7 +18,7 @@ public: AfeAudioProcessor(); ~AfeAudioProcessor(); - void Initialize(AudioCodec* codec, bool realtime_chat) override; + void Initialize(AudioCodec* codec) override; void Feed(const std::vector& data) override; void Start() override; void Stop() override; diff --git a/main/audio_processing/audio_processor.h b/main/audio_processing/audio_processor.h index 048c8f2c..5acff47f 100644 --- a/main/audio_processing/audio_processor.h +++ b/main/audio_processing/audio_processor.h @@ -11,7 +11,7 @@ class AudioProcessor { public: virtual ~AudioProcessor() = default; - virtual void Initialize(AudioCodec* codec, bool realtime_chat) = 0; + virtual void Initialize(AudioCodec* codec) = 0; virtual void Feed(const std::vector& data) = 0; virtual void Start() = 0; virtual void Stop() = 0; diff --git a/main/audio_processing/dummy_audio_processor.cc b/main/audio_processing/dummy_audio_processor.cc index 01b7ac7d..622a7022 100644 --- a/main/audio_processing/dummy_audio_processor.cc +++ b/main/audio_processing/dummy_audio_processor.cc @@ -3,7 +3,7 @@ static const char* TAG = "DummyAudioProcessor"; -void DummyAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { +void DummyAudioProcessor::Initialize(AudioCodec* codec) { codec_ = codec; } diff --git a/main/boards/esp-box-3/config.json b/main/boards/esp-box-3/config.json index bce4d549..67ead9b9 100644 --- a/main/boards/esp-box-3/config.json +++ b/main/boards/esp-box-3/config.json @@ -4,7 +4,7 @@ { "name": "esp-box-3", "sdkconfig_append": [ - "CONFIG_USE_REALTIME_CHAT=y" + "CONFIG_USE_DEVICE_AEC=y" ] } ] diff --git a/main/boards/lichuang-dev/config.json b/main/boards/lichuang-dev/config.json index e3b475da..e2a7090e 100644 --- a/main/boards/lichuang-dev/config.json +++ b/main/boards/lichuang-dev/config.json @@ -4,7 +4,7 @@ { "name": "lichuang-dev", "sdkconfig_append": [ - "CONFIG_USE_REALTIME_CHAT=y" + "CONFIG_USE_DEVICE_AEC=y" ] } ] diff --git a/main/protocols/mqtt_protocol.cc b/main/protocols/mqtt_protocol.cc index 92b39287..fa786e76 100644 --- a/main/protocols/mqtt_protocol.cc +++ b/main/protocols/mqtt_protocol.cc @@ -187,6 +187,9 @@ bool MqttProtocol::OpenAudioChannel() { message += "\"type\":\"hello\","; message += "\"version\": 3,"; message += "\"transport\":\"udp\","; +#if CONFIG_USE_SERVER_AEC + message += "\"features\":{\"aec\":true},"; +#endif message += "\"audio_params\":{"; message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS); message += "}}"; diff --git a/main/protocols/websocket_protocol.cc b/main/protocols/websocket_protocol.cc index 41bfbaa9..57e7af2f 100644 --- a/main/protocols/websocket_protocol.cc +++ b/main/protocols/websocket_protocol.cc @@ -189,6 +189,9 @@ bool WebsocketProtocol::OpenAudioChannel() { std::string message = "{"; message += "\"type\":\"hello\","; message += "\"version\": " + std::to_string(version_) + ","; +#if CONFIG_USE_SERVER_AEC + message += "\"features\":{\"aec\":true},"; +#endif message += "\"transport\":\"websocket\","; message += "\"audio_params\":{"; message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);