diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index 621bf6c7..d39e3be6 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -157,7 +157,9 @@ file(GLOB BOARD_SOURCES list(APPEND SOURCES ${BOARD_SOURCES}) if(CONFIG_USE_AUDIO_PROCESSOR) - list(APPEND SOURCES "audio_processing/audio_processor.cc") + list(APPEND SOURCES "audio_processing/afe_audio_processor.cc") +else() + list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc") endif() if(CONFIG_USE_WAKE_WORD_DETECT) list(APPEND SOURCES "audio_processing/wake_word_detect.cc") diff --git a/main/application.cc b/main/application.cc index 102564a0..bc16b71e 100644 --- a/main/application.cc +++ b/main/application.cc @@ -10,6 +10,12 @@ #include "iot/thing_manager.h" #include "assets/lang_config.h" +#if CONFIG_USE_AUDIO_PROCESSOR +#include "afe_audio_processor.h" +#else +#include "dummy_audio_processor.h" +#endif + #include #include #include @@ -37,6 +43,12 @@ Application::Application() { event_group_ = xEventGroupCreate(); background_task_ = new BackgroundTask(4096 * 8); +#if CONFIG_USE_AUDIO_PROCESSOR + audio_processor_ = std::make_unique(); +#else + audio_processor_ = std::make_unique(); +#endif + esp_timer_create_args_t clock_timer_args = { .callback = [](void* arg) { Application* app = (Application*)arg; @@ -502,9 +514,8 @@ void Application::Start() { }); bool protocol_started = protocol_->Start(); -#if CONFIG_USE_AUDIO_PROCESSOR - audio_processor_.Initialize(codec, realtime_chat_enabled_); - audio_processor_.OnOutput([this](std::vector&& data) { + audio_processor_->Initialize(codec, realtime_chat_enabled_); + audio_processor_->OnOutput([this](std::vector&& data) { background_task_->Schedule([this, data = std::move(data)]() mutable { if (protocol_->IsAudioChannelBusy()) { return; @@ -520,7 +531,7 @@ void Application::Start() { }); }); }); - audio_processor_.OnVadStateChange([this](bool speaking) { + audio_processor_->OnVadStateChange([this](bool speaking) { if (device_state_ == kDeviceStateListening) { Schedule([this, speaking]() { if (speaking) { @@ -533,7 +544,6 @@ void Application::Start() { }); } }); -#endif #if CONFIG_USE_WAKE_WORD_DETECT wake_word_detect_.Initialize(codec); @@ -716,37 +726,16 @@ void Application::OnAudioInput() { } } #endif -#if CONFIG_USE_AUDIO_PROCESSOR - if (audio_processor_.IsRunning()) { + if (audio_processor_->IsRunning()) { std::vector data; - int samples = audio_processor_.GetFeedSize(); + int samples = audio_processor_->GetFeedSize(); if (samples > 0) { ReadAudio(data, 16000, samples); - audio_processor_.Feed(data); + audio_processor_->Feed(data); return; } } -#else - if (device_state_ == kDeviceStateListening) { - std::vector data; - ReadAudio(data, 16000, 30 * 16000 / 1000); - background_task_->Schedule([this, data = std::move(data)]() mutable { - if (protocol_->IsAudioChannelBusy()) { - return; - } - opus_encoder_->Encode(std::move(data), [this](std::vector&& opus) { - AudioStreamPacket packet; - packet.payload = std::move(opus); - packet.timestamp = last_output_timestamp_; - last_output_timestamp_ = 0; - Schedule([this, packet = std::move(packet)]() { - protocol_->SendAudio(packet); - }); - }); - }); - return; - } -#endif + vTaskDelay(pdMS_TO_TICKS(30)); } @@ -818,9 +807,7 @@ void Application::SetDeviceState(DeviceState state) { case kDeviceStateIdle: display->SetStatus(Lang::Strings::STANDBY); display->SetEmotion("neutral"); -#if CONFIG_USE_AUDIO_PROCESSOR - audio_processor_.Stop(); -#endif + audio_processor_->Stop(); #if CONFIG_USE_WAKE_WORD_DETECT wake_word_detect_.StartDetection(); #endif @@ -838,11 +825,7 @@ void Application::SetDeviceState(DeviceState state) { UpdateIotStates(); // Make sure the audio processor is running -#if CONFIG_USE_AUDIO_PROCESSOR - if (!audio_processor_.IsRunning()) { -#else - if (true) { -#endif + if (!audio_processor_->IsRunning()) { // Send the start listening command protocol_->SendStartListening(listening_mode_); if (listening_mode_ == kListeningModeAutoStop && previous_state == kDeviceStateSpeaking) { @@ -853,18 +836,14 @@ void Application::SetDeviceState(DeviceState state) { #if CONFIG_USE_WAKE_WORD_DETECT wake_word_detect_.StopDetection(); #endif -#if CONFIG_USE_AUDIO_PROCESSOR - audio_processor_.Start(); -#endif + audio_processor_->Start(); } break; case kDeviceStateSpeaking: display->SetStatus(Lang::Strings::SPEAKING); if (listening_mode_ != kListeningModeRealtime) { -#if CONFIG_USE_AUDIO_PROCESSOR - audio_processor_.Stop(); -#endif + audio_processor_->Stop(); #if CONFIG_USE_WAKE_WORD_DETECT wake_word_detect_.StartDetection(); #endif diff --git a/main/application.h b/main/application.h index a806c346..4291ed77 100644 --- a/main/application.h +++ b/main/application.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -19,13 +20,11 @@ #include "protocol.h" #include "ota.h" #include "background_task.h" +#include "audio_processor.h" #if CONFIG_USE_WAKE_WORD_DETECT #include "wake_word_detect.h" #endif -#if CONFIG_USE_AUDIO_PROCESSOR -#include "audio_processor.h" -#endif #define SCHEDULE_EVENT (1 << 0) #define AUDIO_INPUT_READY_EVENT (1 << 1) @@ -81,9 +80,7 @@ private: #if CONFIG_USE_WAKE_WORD_DETECT WakeWordDetect wake_word_detect_; #endif -#if CONFIG_USE_AUDIO_PROCESSOR - AudioProcessor audio_processor_; -#endif + std::unique_ptr audio_processor_; Ota ota_; std::mutex mutex_; std::list> main_tasks_; diff --git a/main/audio_processing/audio_processor.cc b/main/audio_processing/afe_audio_processor.cc similarity index 83% rename from main/audio_processing/audio_processor.cc rename to main/audio_processing/afe_audio_processor.cc index 9bab939b..30898cab 100644 --- a/main/audio_processing/audio_processor.cc +++ b/main/audio_processing/afe_audio_processor.cc @@ -1,16 +1,16 @@ -#include "audio_processor.h" +#include "afe_audio_processor.h" #include #define PROCESSOR_RUNNING 0x01 -static const char* TAG = "AudioProcessor"; +static const char* TAG = "AfeAudioProcessor"; -AudioProcessor::AudioProcessor() +AfeAudioProcessor::AfeAudioProcessor() : afe_data_(nullptr) { event_group_ = xEventGroupCreate(); } -void AudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { +void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { codec_ = codec; int ref_num = codec_->input_reference() ? 1 : 0; @@ -51,57 +51,57 @@ void AudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { afe_data_ = afe_iface_->create_from_config(afe_config); xTaskCreate([](void* arg) { - auto this_ = (AudioProcessor*)arg; + auto this_ = (AfeAudioProcessor*)arg; this_->AudioProcessorTask(); vTaskDelete(NULL); }, "audio_communication", 4096, this, 3, NULL); } -AudioProcessor::~AudioProcessor() { +AfeAudioProcessor::~AfeAudioProcessor() { if (afe_data_ != nullptr) { afe_iface_->destroy(afe_data_); } vEventGroupDelete(event_group_); } -size_t AudioProcessor::GetFeedSize() { +size_t AfeAudioProcessor::GetFeedSize() { if (afe_data_ == nullptr) { return 0; } return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels(); } -void AudioProcessor::Feed(const std::vector& data) { +void AfeAudioProcessor::Feed(const std::vector& data) { if (afe_data_ == nullptr) { return; } afe_iface_->feed(afe_data_, data.data()); } -void AudioProcessor::Start() { +void AfeAudioProcessor::Start() { xEventGroupSetBits(event_group_, PROCESSOR_RUNNING); } -void AudioProcessor::Stop() { +void AfeAudioProcessor::Stop() { xEventGroupClearBits(event_group_, PROCESSOR_RUNNING); if (afe_data_ != nullptr) { afe_iface_->reset_buffer(afe_data_); } } -bool AudioProcessor::IsRunning() { +bool AfeAudioProcessor::IsRunning() { return xEventGroupGetBits(event_group_) & PROCESSOR_RUNNING; } -void AudioProcessor::OnOutput(std::function&& data)> callback) { +void AfeAudioProcessor::OnOutput(std::function&& data)> callback) { output_callback_ = callback; } -void AudioProcessor::OnVadStateChange(std::function callback) { +void AfeAudioProcessor::OnVadStateChange(std::function callback) { vad_state_change_callback_ = callback; } -void AudioProcessor::AudioProcessorTask() { +void AfeAudioProcessor::AudioProcessorTask() { auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_); auto feed_size = afe_iface_->get_feed_chunksize(afe_data_); ESP_LOGI(TAG, "Audio communication task started, feed size: %d fetch size: %d", @@ -136,4 +136,4 @@ void AudioProcessor::AudioProcessorTask() { output_callback_(std::vector(res->data, res->data + res->data_size / sizeof(int16_t))); } } -} +} \ No newline at end of file diff --git a/main/audio_processing/afe_audio_processor.h b/main/audio_processing/afe_audio_processor.h new file mode 100644 index 00000000..a7aa1d2c --- /dev/null +++ b/main/audio_processing/afe_audio_processor.h @@ -0,0 +1,42 @@ +#ifndef AFE_AUDIO_PROCESSOR_H +#define AFE_AUDIO_PROCESSOR_H + +#include +#include +#include +#include + +#include +#include +#include + +#include "audio_processor.h" +#include "audio_codec.h" + +class AfeAudioProcessor : public AudioProcessor { +public: + AfeAudioProcessor(); + ~AfeAudioProcessor(); + + void Initialize(AudioCodec* codec, bool realtime_chat) override; + void Feed(const std::vector& data) override; + void Start() override; + void Stop() override; + bool IsRunning() override; + void OnOutput(std::function&& data)> callback) override; + void OnVadStateChange(std::function callback) override; + size_t GetFeedSize() override; + +private: + EventGroupHandle_t event_group_ = nullptr; + esp_afe_sr_iface_t* afe_iface_ = nullptr; + esp_afe_sr_data_t* afe_data_ = nullptr; + std::function&& data)> output_callback_; + std::function vad_state_change_callback_; + AudioCodec* codec_ = nullptr; + bool is_speaking_ = false; + + void AudioProcessorTask(); +}; + +#endif \ No newline at end of file diff --git a/main/audio_processing/audio_processor.h b/main/audio_processing/audio_processor.h index 3c2c4b75..048c8f2c 100644 --- a/main/audio_processing/audio_processor.h +++ b/main/audio_processing/audio_processor.h @@ -1,11 +1,6 @@ #ifndef AUDIO_PROCESSOR_H #define AUDIO_PROCESSOR_H -#include -#include -#include -#include - #include #include #include @@ -14,28 +9,16 @@ class AudioProcessor { public: - AudioProcessor(); - ~AudioProcessor(); - - void Initialize(AudioCodec* codec, bool realtime_chat); - void Feed(const std::vector& data); - void Start(); - void Stop(); - bool IsRunning(); - void OnOutput(std::function&& data)> callback); - void OnVadStateChange(std::function callback); - size_t GetFeedSize(); - -private: - EventGroupHandle_t event_group_ = nullptr; - esp_afe_sr_iface_t* afe_iface_ = nullptr; - esp_afe_sr_data_t* afe_data_ = nullptr; - std::function&& data)> output_callback_; - std::function vad_state_change_callback_; - AudioCodec* codec_ = nullptr; - bool is_speaking_ = false; - - void AudioProcessorTask(); + virtual ~AudioProcessor() = default; + + virtual void Initialize(AudioCodec* codec, bool realtime_chat) = 0; + virtual void Feed(const std::vector& data) = 0; + virtual void Start() = 0; + virtual void Stop() = 0; + virtual bool IsRunning() = 0; + virtual void OnOutput(std::function&& data)> callback) = 0; + virtual void OnVadStateChange(std::function callback) = 0; + virtual size_t GetFeedSize() = 0; }; #endif diff --git a/main/audio_processing/dummy_audio_processor.cc b/main/audio_processing/dummy_audio_processor.cc new file mode 100644 index 00000000..01b7ac7d --- /dev/null +++ b/main/audio_processing/dummy_audio_processor.cc @@ -0,0 +1,44 @@ +#include "dummy_audio_processor.h" +#include + +static const char* TAG = "DummyAudioProcessor"; + +void DummyAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) { + codec_ = codec; +} + +void DummyAudioProcessor::Feed(const std::vector& data) { + if (!is_running_ || !output_callback_) { + return; + } + // 直接将输入数据传递给输出回调 + output_callback_(std::vector(data)); +} + +void DummyAudioProcessor::Start() { + is_running_ = true; +} + +void DummyAudioProcessor::Stop() { + is_running_ = false; +} + +bool DummyAudioProcessor::IsRunning() { + return is_running_; +} + +void DummyAudioProcessor::OnOutput(std::function&& data)> callback) { + output_callback_ = callback; +} + +void DummyAudioProcessor::OnVadStateChange(std::function callback) { + vad_state_change_callback_ = callback; +} + +size_t DummyAudioProcessor::GetFeedSize() { + if (!codec_) { + return 0; + } + // 返回一个固定的帧大小,比如 30ms 的数据 + return 30 * codec_->input_sample_rate() / 1000; +} \ No newline at end of file diff --git a/main/audio_processing/dummy_audio_processor.h b/main/audio_processing/dummy_audio_processor.h new file mode 100644 index 00000000..0d226931 --- /dev/null +++ b/main/audio_processing/dummy_audio_processor.h @@ -0,0 +1,31 @@ +#ifndef DUMMY_AUDIO_PROCESSOR_H +#define DUMMY_AUDIO_PROCESSOR_H + +#include +#include + +#include "audio_processor.h" +#include "audio_codec.h" + +class DummyAudioProcessor : public AudioProcessor { +public: + DummyAudioProcessor() = default; + ~DummyAudioProcessor() = default; + + void Initialize(AudioCodec* codec, bool realtime_chat) override; + void Feed(const std::vector& data) override; + void Start() override; + void Stop() override; + bool IsRunning() override; + void OnOutput(std::function&& data)> callback) override; + void OnVadStateChange(std::function callback) override; + size_t GetFeedSize() override; + +private: + AudioCodec* codec_ = nullptr; + std::function&& data)> output_callback_; + std::function vad_state_change_callback_; + bool is_running_ = false; +}; + +#endif \ No newline at end of file