Add dummy audio processor

This commit is contained in:
Terrence
2025-04-29 18:17:08 +08:00
parent c201e5955f
commit f6334246ca
8 changed files with 171 additions and 93 deletions

View File

@@ -157,7 +157,9 @@ file(GLOB BOARD_SOURCES
list(APPEND SOURCES ${BOARD_SOURCES})
if(CONFIG_USE_AUDIO_PROCESSOR)
list(APPEND SOURCES "audio_processing/audio_processor.cc")
list(APPEND SOURCES "audio_processing/afe_audio_processor.cc")
else()
list(APPEND SOURCES "audio_processing/dummy_audio_processor.cc")
endif()
if(CONFIG_USE_WAKE_WORD_DETECT)
list(APPEND SOURCES "audio_processing/wake_word_detect.cc")

View File

@@ -10,6 +10,12 @@
#include "iot/thing_manager.h"
#include "assets/lang_config.h"
#if CONFIG_USE_AUDIO_PROCESSOR
#include "afe_audio_processor.h"
#else
#include "dummy_audio_processor.h"
#endif
#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
@@ -37,6 +43,12 @@ Application::Application() {
event_group_ = xEventGroupCreate();
background_task_ = new BackgroundTask(4096 * 8);
#if CONFIG_USE_AUDIO_PROCESSOR
audio_processor_ = std::make_unique<AfeAudioProcessor>();
#else
audio_processor_ = std::make_unique<DummyAudioProcessor>();
#endif
esp_timer_create_args_t clock_timer_args = {
.callback = [](void* arg) {
Application* app = (Application*)arg;
@@ -502,9 +514,8 @@ void Application::Start() {
});
bool protocol_started = protocol_->Start();
#if CONFIG_USE_AUDIO_PROCESSOR
audio_processor_.Initialize(codec, realtime_chat_enabled_);
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
audio_processor_->Initialize(codec, realtime_chat_enabled_);
audio_processor_->OnOutput([this](std::vector<int16_t>&& data) {
background_task_->Schedule([this, data = std::move(data)]() mutable {
if (protocol_->IsAudioChannelBusy()) {
return;
@@ -520,7 +531,7 @@ void Application::Start() {
});
});
});
audio_processor_.OnVadStateChange([this](bool speaking) {
audio_processor_->OnVadStateChange([this](bool speaking) {
if (device_state_ == kDeviceStateListening) {
Schedule([this, speaking]() {
if (speaking) {
@@ -533,7 +544,6 @@ void Application::Start() {
});
}
});
#endif
#if CONFIG_USE_WAKE_WORD_DETECT
wake_word_detect_.Initialize(codec);
@@ -716,37 +726,16 @@ void Application::OnAudioInput() {
}
}
#endif
#if CONFIG_USE_AUDIO_PROCESSOR
if (audio_processor_.IsRunning()) {
if (audio_processor_->IsRunning()) {
std::vector<int16_t> data;
int samples = audio_processor_.GetFeedSize();
int samples = audio_processor_->GetFeedSize();
if (samples > 0) {
ReadAudio(data, 16000, samples);
audio_processor_.Feed(data);
audio_processor_->Feed(data);
return;
}
}
#else
if (device_state_ == kDeviceStateListening) {
std::vector<int16_t> data;
ReadAudio(data, 16000, 30 * 16000 / 1000);
background_task_->Schedule([this, data = std::move(data)]() mutable {
if (protocol_->IsAudioChannelBusy()) {
return;
}
opus_encoder_->Encode(std::move(data), [this](std::vector<uint8_t>&& opus) {
AudioStreamPacket packet;
packet.payload = std::move(opus);
packet.timestamp = last_output_timestamp_;
last_output_timestamp_ = 0;
Schedule([this, packet = std::move(packet)]() {
protocol_->SendAudio(packet);
});
});
});
return;
}
#endif
vTaskDelay(pdMS_TO_TICKS(30));
}
@@ -818,9 +807,7 @@ void Application::SetDeviceState(DeviceState state) {
case kDeviceStateIdle:
display->SetStatus(Lang::Strings::STANDBY);
display->SetEmotion("neutral");
#if CONFIG_USE_AUDIO_PROCESSOR
audio_processor_.Stop();
#endif
audio_processor_->Stop();
#if CONFIG_USE_WAKE_WORD_DETECT
wake_word_detect_.StartDetection();
#endif
@@ -838,11 +825,7 @@ void Application::SetDeviceState(DeviceState state) {
UpdateIotStates();
// Make sure the audio processor is running
#if CONFIG_USE_AUDIO_PROCESSOR
if (!audio_processor_.IsRunning()) {
#else
if (true) {
#endif
if (!audio_processor_->IsRunning()) {
// Send the start listening command
protocol_->SendStartListening(listening_mode_);
if (listening_mode_ == kListeningModeAutoStop && previous_state == kDeviceStateSpeaking) {
@@ -853,18 +836,14 @@ void Application::SetDeviceState(DeviceState state) {
#if CONFIG_USE_WAKE_WORD_DETECT
wake_word_detect_.StopDetection();
#endif
#if CONFIG_USE_AUDIO_PROCESSOR
audio_processor_.Start();
#endif
audio_processor_->Start();
}
break;
case kDeviceStateSpeaking:
display->SetStatus(Lang::Strings::SPEAKING);
if (listening_mode_ != kListeningModeRealtime) {
#if CONFIG_USE_AUDIO_PROCESSOR
audio_processor_.Stop();
#endif
audio_processor_->Stop();
#if CONFIG_USE_WAKE_WORD_DETECT
wake_word_detect_.StartDetection();
#endif

View File

@@ -11,6 +11,7 @@
#include <list>
#include <vector>
#include <condition_variable>
#include <memory>
#include <opus_encoder.h>
#include <opus_decoder.h>
@@ -19,13 +20,11 @@
#include "protocol.h"
#include "ota.h"
#include "background_task.h"
#include "audio_processor.h"
#if CONFIG_USE_WAKE_WORD_DETECT
#include "wake_word_detect.h"
#endif
#if CONFIG_USE_AUDIO_PROCESSOR
#include "audio_processor.h"
#endif
#define SCHEDULE_EVENT (1 << 0)
#define AUDIO_INPUT_READY_EVENT (1 << 1)
@@ -81,9 +80,7 @@ private:
#if CONFIG_USE_WAKE_WORD_DETECT
WakeWordDetect wake_word_detect_;
#endif
#if CONFIG_USE_AUDIO_PROCESSOR
AudioProcessor audio_processor_;
#endif
std::unique_ptr<AudioProcessor> audio_processor_;
Ota ota_;
std::mutex mutex_;
std::list<std::function<void()>> main_tasks_;

View File

@@ -1,16 +1,16 @@
#include "audio_processor.h"
#include "afe_audio_processor.h"
#include <esp_log.h>
#define PROCESSOR_RUNNING 0x01
static const char* TAG = "AudioProcessor";
static const char* TAG = "AfeAudioProcessor";
AudioProcessor::AudioProcessor()
AfeAudioProcessor::AfeAudioProcessor()
: afe_data_(nullptr) {
event_group_ = xEventGroupCreate();
}
void AudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
void AfeAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
codec_ = codec;
int ref_num = codec_->input_reference() ? 1 : 0;
@@ -51,57 +51,57 @@ void AudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
afe_data_ = afe_iface_->create_from_config(afe_config);
xTaskCreate([](void* arg) {
auto this_ = (AudioProcessor*)arg;
auto this_ = (AfeAudioProcessor*)arg;
this_->AudioProcessorTask();
vTaskDelete(NULL);
}, "audio_communication", 4096, this, 3, NULL);
}
AudioProcessor::~AudioProcessor() {
AfeAudioProcessor::~AfeAudioProcessor() {
if (afe_data_ != nullptr) {
afe_iface_->destroy(afe_data_);
}
vEventGroupDelete(event_group_);
}
size_t AudioProcessor::GetFeedSize() {
size_t AfeAudioProcessor::GetFeedSize() {
if (afe_data_ == nullptr) {
return 0;
}
return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
}
void AudioProcessor::Feed(const std::vector<int16_t>& data) {
void AfeAudioProcessor::Feed(const std::vector<int16_t>& data) {
if (afe_data_ == nullptr) {
return;
}
afe_iface_->feed(afe_data_, data.data());
}
void AudioProcessor::Start() {
void AfeAudioProcessor::Start() {
xEventGroupSetBits(event_group_, PROCESSOR_RUNNING);
}
void AudioProcessor::Stop() {
void AfeAudioProcessor::Stop() {
xEventGroupClearBits(event_group_, PROCESSOR_RUNNING);
if (afe_data_ != nullptr) {
afe_iface_->reset_buffer(afe_data_);
}
}
bool AudioProcessor::IsRunning() {
bool AfeAudioProcessor::IsRunning() {
return xEventGroupGetBits(event_group_) & PROCESSOR_RUNNING;
}
void AudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
void AfeAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
output_callback_ = callback;
}
void AudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
void AfeAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
vad_state_change_callback_ = callback;
}
void AudioProcessor::AudioProcessorTask() {
void AfeAudioProcessor::AudioProcessorTask() {
auto fetch_size = afe_iface_->get_fetch_chunksize(afe_data_);
auto feed_size = afe_iface_->get_feed_chunksize(afe_data_);
ESP_LOGI(TAG, "Audio communication task started, feed size: %d fetch size: %d",
@@ -136,4 +136,4 @@ void AudioProcessor::AudioProcessorTask() {
output_callback_(std::vector<int16_t>(res->data, res->data + res->data_size / sizeof(int16_t)));
}
}
}
}

View File

@@ -0,0 +1,42 @@
#ifndef AFE_AUDIO_PROCESSOR_H
#define AFE_AUDIO_PROCESSOR_H
#include <esp_afe_sr_models.h>
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <string>
#include <vector>
#include <functional>
#include "audio_processor.h"
#include "audio_codec.h"
class AfeAudioProcessor : public AudioProcessor {
public:
AfeAudioProcessor();
~AfeAudioProcessor();
void Initialize(AudioCodec* codec, bool realtime_chat) override;
void Feed(const std::vector<int16_t>& data) override;
void Start() override;
void Stop() override;
bool IsRunning() override;
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) override;
void OnVadStateChange(std::function<void(bool speaking)> callback) override;
size_t GetFeedSize() override;
private:
EventGroupHandle_t event_group_ = nullptr;
esp_afe_sr_iface_t* afe_iface_ = nullptr;
esp_afe_sr_data_t* afe_data_ = nullptr;
std::function<void(std::vector<int16_t>&& data)> output_callback_;
std::function<void(bool speaking)> vad_state_change_callback_;
AudioCodec* codec_ = nullptr;
bool is_speaking_ = false;
void AudioProcessorTask();
};
#endif

View File

@@ -1,11 +1,6 @@
#ifndef AUDIO_PROCESSOR_H
#define AUDIO_PROCESSOR_H
#include <esp_afe_sr_models.h>
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <string>
#include <vector>
#include <functional>
@@ -14,28 +9,16 @@
class AudioProcessor {
public:
AudioProcessor();
~AudioProcessor();
void Initialize(AudioCodec* codec, bool realtime_chat);
void Feed(const std::vector<int16_t>& data);
void Start();
void Stop();
bool IsRunning();
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback);
void OnVadStateChange(std::function<void(bool speaking)> callback);
size_t GetFeedSize();
private:
EventGroupHandle_t event_group_ = nullptr;
esp_afe_sr_iface_t* afe_iface_ = nullptr;
esp_afe_sr_data_t* afe_data_ = nullptr;
std::function<void(std::vector<int16_t>&& data)> output_callback_;
std::function<void(bool speaking)> vad_state_change_callback_;
AudioCodec* codec_ = nullptr;
bool is_speaking_ = false;
void AudioProcessorTask();
virtual ~AudioProcessor() = default;
virtual void Initialize(AudioCodec* codec, bool realtime_chat) = 0;
virtual void Feed(const std::vector<int16_t>& data) = 0;
virtual void Start() = 0;
virtual void Stop() = 0;
virtual bool IsRunning() = 0;
virtual void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) = 0;
virtual void OnVadStateChange(std::function<void(bool speaking)> callback) = 0;
virtual size_t GetFeedSize() = 0;
};
#endif

View File

@@ -0,0 +1,44 @@
#include "dummy_audio_processor.h"
#include <esp_log.h>
static const char* TAG = "DummyAudioProcessor";
void DummyAudioProcessor::Initialize(AudioCodec* codec, bool realtime_chat) {
codec_ = codec;
}
void DummyAudioProcessor::Feed(const std::vector<int16_t>& data) {
if (!is_running_ || !output_callback_) {
return;
}
// 直接将输入数据传递给输出回调
output_callback_(std::vector<int16_t>(data));
}
void DummyAudioProcessor::Start() {
is_running_ = true;
}
void DummyAudioProcessor::Stop() {
is_running_ = false;
}
bool DummyAudioProcessor::IsRunning() {
return is_running_;
}
void DummyAudioProcessor::OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) {
output_callback_ = callback;
}
void DummyAudioProcessor::OnVadStateChange(std::function<void(bool speaking)> callback) {
vad_state_change_callback_ = callback;
}
size_t DummyAudioProcessor::GetFeedSize() {
if (!codec_) {
return 0;
}
// 返回一个固定的帧大小,比如 30ms 的数据
return 30 * codec_->input_sample_rate() / 1000;
}

View File

@@ -0,0 +1,31 @@
#ifndef DUMMY_AUDIO_PROCESSOR_H
#define DUMMY_AUDIO_PROCESSOR_H
#include <vector>
#include <functional>
#include "audio_processor.h"
#include "audio_codec.h"
class DummyAudioProcessor : public AudioProcessor {
public:
DummyAudioProcessor() = default;
~DummyAudioProcessor() = default;
void Initialize(AudioCodec* codec, bool realtime_chat) override;
void Feed(const std::vector<int16_t>& data) override;
void Start() override;
void Stop() override;
bool IsRunning() override;
void OnOutput(std::function<void(std::vector<int16_t>&& data)> callback) override;
void OnVadStateChange(std::function<void(bool speaking)> callback) override;
size_t GetFeedSize() override;
private:
AudioCodec* codec_ = nullptr;
std::function<void(std::vector<int16_t>&& data)> output_callback_;
std::function<void(bool speaking)> vad_state_change_callback_;
bool is_running_ = false;
};
#endif