forked from xiaozhi/xiaozhi-esp32
Add server AEC option
This commit is contained in:
@@ -248,11 +248,18 @@ config USE_AUDIO_PROCESSOR
|
|||||||
help
|
help
|
||||||
需要 ESP32 S3 与 AFE 支持
|
需要 ESP32 S3 与 AFE 支持
|
||||||
|
|
||||||
config USE_REALTIME_CHAT
|
config USE_DEVICE_AEC
|
||||||
bool "启用可语音打断的实时对话模式(需要 AEC 支持)"
|
bool "在通话过程中启用设备端 AEC"
|
||||||
default n
|
default n
|
||||||
depends on USE_AUDIO_PROCESSOR && (BOARD_TYPE_ESP_BOX_3 || BOARD_TYPE_ESP_BOX || BOARD_TYPE_ESP_BOX_LITE || BOARD_TYPE_LICHUANG_DEV || BOARD_TYPE_ESP32S3_KORVO2_V3)
|
depends on USE_AUDIO_PROCESSOR && (BOARD_TYPE_ESP_BOX_3 || BOARD_TYPE_ESP_BOX || BOARD_TYPE_ESP_BOX_LITE || BOARD_TYPE_LICHUANG_DEV || BOARD_TYPE_ESP32S3_KORVO2_V3)
|
||||||
help
|
help
|
||||||
需要 ESP32 S3 与 AEC 开启,因为性能不够,不建议和微信聊天界面风格同时开启
|
因为性能不够,不建议和微信聊天界面风格同时开启
|
||||||
|
|
||||||
|
config USE_SERVER_AEC
|
||||||
|
bool "在通话过程中启用服务器端 AEC"
|
||||||
|
default n
|
||||||
|
depends on USE_AUDIO_PROCESSOR
|
||||||
|
help
|
||||||
|
启用服务器端 AEC,需要服务器支持
|
||||||
|
|
||||||
endmenu
|
endmenu
|
||||||
|
|||||||
@@ -375,11 +375,19 @@ void Application::Start() {
|
|||||||
}
|
}
|
||||||
codec->Start();
|
codec->Start();
|
||||||
|
|
||||||
|
#if CONFIG_USE_AUDIO_PROCESSOR
|
||||||
xTaskCreatePinnedToCore([](void* arg) {
|
xTaskCreatePinnedToCore([](void* arg) {
|
||||||
Application* app = (Application*)arg;
|
Application* app = (Application*)arg;
|
||||||
app->AudioLoop();
|
app->AudioLoop();
|
||||||
vTaskDelete(NULL);
|
vTaskDelete(NULL);
|
||||||
}, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_, realtime_chat_enabled_ ? 1 : 0);
|
}, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_, 1);
|
||||||
|
#else
|
||||||
|
xTaskCreate([](void* arg) {
|
||||||
|
Application* app = (Application*)arg;
|
||||||
|
app->AudioLoop();
|
||||||
|
vTaskDelete(NULL);
|
||||||
|
}, "audio_loop", 4096 * 2, this, 8, &audio_loop_task_handle_);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Wait for the network to be ready */
|
/* Wait for the network to be ready */
|
||||||
board.StartNetwork();
|
board.StartNetwork();
|
||||||
@@ -514,7 +522,7 @@ void Application::Start() {
|
|||||||
});
|
});
|
||||||
bool protocol_started = protocol_->Start();
|
bool protocol_started = protocol_->Start();
|
||||||
|
|
||||||
audio_processor_->Initialize(codec, realtime_chat_enabled_);
|
audio_processor_->Initialize(codec);
|
||||||
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
|
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
|
||||||
background_task_->Schedule([this, data = std::move(data)]() mutable {
|
background_task_->Schedule([this, data = std::move(data)]() mutable {
|
||||||
if (protocol_->IsAudioChannelBusy()) {
|
if (protocol_->IsAudioChannelBusy()) {
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ private:
|
|||||||
esp_timer_handle_t clock_timer_handle_ = nullptr;
|
esp_timer_handle_t clock_timer_handle_ = nullptr;
|
||||||
volatile DeviceState device_state_ = kDeviceStateUnknown;
|
volatile DeviceState device_state_ = kDeviceStateUnknown;
|
||||||
ListeningMode listening_mode_ = kListeningModeAutoStop;
|
ListeningMode listening_mode_ = kListeningModeAutoStop;
|
||||||
#if CONFIG_USE_REALTIME_CHAT
|
#if CONFIG_USE_DEVICE_AEC || CONFIG_USE_SERVER_AEC
|
||||||
bool realtime_chat_enabled_ = true;
|
bool realtime_chat_enabled_ = true;
|
||||||
#else
|
#else
|
||||||
bool realtime_chat_enabled_ = false;
|
bool realtime_chat_enabled_ = false;
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ AfeAudioProcessor::AfeAudioProcessor()
|
|||||||
event_group_ = xEventGroupCreate();
|
event_group_ = xEventGroupCreate();
|
||||||
}
|
}
|
||||||
|
|
||||||
void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
|
void AfeAudioProcessor::Initialize(AudioCodec* codec) {
|
||||||
codec_ = codec;
|
codec_ = codec;
|
||||||
int ref_num = codec_->input_reference() ? 1 : 0;
|
int ref_num = codec_->input_reference() ? 1 : 0;
|
||||||
|
|
||||||
@@ -26,22 +26,22 @@ void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
|
|||||||
char* ns_model_name = esp_srmodel_filter(models, ESP_NSNET_PREFIX, NULL);
|
char* ns_model_name = esp_srmodel_filter(models, ESP_NSNET_PREFIX, NULL);
|
||||||
|
|
||||||
afe_config_t* afe_config = afe_config_init(input_format.c_str(), NULL, AFE_TYPE_VC, AFE_MODE_HIGH_PERF);
|
afe_config_t* afe_config = afe_config_init(input_format.c_str(), NULL, AFE_TYPE_VC, AFE_MODE_HIGH_PERF);
|
||||||
if (realtime_chat) {
|
#ifdef CONFIG_USE_DEVICE_AEC
|
||||||
afe_config->aec_init = true;
|
afe_config->aec_init = true;
|
||||||
afe_config->aec_mode = AEC_MODE_VOIP_HIGH_PERF;
|
afe_config->aec_mode = AEC_MODE_VOIP_HIGH_PERF;
|
||||||
} else {
|
#else
|
||||||
afe_config->aec_init = false;
|
afe_config->aec_init = false;
|
||||||
}
|
#endif
|
||||||
afe_config->ns_init = true;
|
afe_config->ns_init = true;
|
||||||
afe_config->ns_model_name = ns_model_name;
|
afe_config->ns_model_name = ns_model_name;
|
||||||
afe_config->afe_ns_mode = AFE_NS_MODE_NET;
|
afe_config->afe_ns_mode = AFE_NS_MODE_NET;
|
||||||
if (realtime_chat) {
|
#ifdef CONFIG_USE_DEVICE_AEC
|
||||||
afe_config->vad_init = false;
|
afe_config->vad_init = false;
|
||||||
} else {
|
#else
|
||||||
afe_config->vad_init = true;
|
afe_config->vad_init = true;
|
||||||
afe_config->vad_mode = VAD_MODE_0;
|
afe_config->vad_mode = VAD_MODE_0;
|
||||||
afe_config->vad_min_noise_ms = 100;
|
afe_config->vad_min_noise_ms = 100;
|
||||||
}
|
#endif
|
||||||
afe_config->afe_perferred_core = 1;
|
afe_config->afe_perferred_core = 1;
|
||||||
afe_config->afe_perferred_priority = 1;
|
afe_config->afe_perferred_priority = 1;
|
||||||
afe_config->agc_init = false;
|
afe_config->agc_init = false;
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ public:
|
|||||||
AfeAudioProcessor();
|
AfeAudioProcessor();
|
||||||
~AfeAudioProcessor();
|
~AfeAudioProcessor();
|
||||||
|
|
||||||
void Initialize(AudioCodec* codec, bool realtime_chat) override;
|
void Initialize(AudioCodec* codec) override;
|
||||||
void Feed(const std::vector<int16_t>& data) override;
|
void Feed(const std::vector<int16_t>& data) override;
|
||||||
void Start() override;
|
void Start() override;
|
||||||
void Stop() override;
|
void Stop() override;
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ class AudioProcessor {
|
|||||||
public:
|
public:
|
||||||
virtual ~AudioProcessor() = default;
|
virtual ~AudioProcessor() = default;
|
||||||
|
|
||||||
virtual void Initialize(AudioCodec* codec, bool realtime_chat) = 0;
|
virtual void Initialize(AudioCodec* codec) = 0;
|
||||||
virtual void Feed(const std::vector<int16_t>& data) = 0;
|
virtual void Feed(const std::vector<int16_t>& data) = 0;
|
||||||
virtual void Start() = 0;
|
virtual void Start() = 0;
|
||||||
virtual void Stop() = 0;
|
virtual void Stop() = 0;
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
static const char* TAG = "DummyAudioProcessor";
|
static const char* TAG = "DummyAudioProcessor";
|
||||||
|
|
||||||
void DummyAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
|
void DummyAudioProcessor::Initialize(AudioCodec* codec) {
|
||||||
codec_ = codec;
|
codec_ = codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
{
|
{
|
||||||
"name": "esp-box-3",
|
"name": "esp-box-3",
|
||||||
"sdkconfig_append": [
|
"sdkconfig_append": [
|
||||||
"CONFIG_USE_REALTIME_CHAT=y"
|
"CONFIG_USE_DEVICE_AEC=y"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
{
|
{
|
||||||
"name": "lichuang-dev",
|
"name": "lichuang-dev",
|
||||||
"sdkconfig_append": [
|
"sdkconfig_append": [
|
||||||
"CONFIG_USE_REALTIME_CHAT=y"
|
"CONFIG_USE_DEVICE_AEC=y"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -187,6 +187,9 @@ bool MqttProtocol::OpenAudioChannel() {
|
|||||||
message += "\"type\":\"hello\",";
|
message += "\"type\":\"hello\",";
|
||||||
message += "\"version\": 3,";
|
message += "\"version\": 3,";
|
||||||
message += "\"transport\":\"udp\",";
|
message += "\"transport\":\"udp\",";
|
||||||
|
#if CONFIG_USE_SERVER_AEC
|
||||||
|
message += "\"features\":{\"aec\":true},";
|
||||||
|
#endif
|
||||||
message += "\"audio_params\":{";
|
message += "\"audio_params\":{";
|
||||||
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
||||||
message += "}}";
|
message += "}}";
|
||||||
|
|||||||
@@ -189,6 +189,9 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
std::string message = "{";
|
std::string message = "{";
|
||||||
message += "\"type\":\"hello\",";
|
message += "\"type\":\"hello\",";
|
||||||
message += "\"version\": " + std::to_string(version_) + ",";
|
message += "\"version\": " + std::to_string(version_) + ",";
|
||||||
|
#if CONFIG_USE_SERVER_AEC
|
||||||
|
message += "\"features\":{\"aec\":true},";
|
||||||
|
#endif
|
||||||
message += "\"transport\":\"websocket\",";
|
message += "\"transport\":\"websocket\",";
|
||||||
message += "\"audio_params\":{";
|
message += "\"audio_params\":{";
|
||||||
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
||||||
|
|||||||
Reference in New Issue
Block a user