forked from xiaozhi/xiaozhi-esp32
Fix Server AEC
This commit is contained in:
@@ -10,8 +10,14 @@ AfeAudioProcessor::AfeAudioProcessor()
|
||||
event_group_ = xEventGroupCreate();
|
||||
}
|
||||
|
||||
void AfeAudioProcessor::Initialize(AudioCodec* codec) {
|
||||
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
||||
codec_ = codec;
|
||||
frame_duration_ms_ = frame_duration_ms;
|
||||
frame_samples_ = frame_duration_ms_ * codec_->input_sample_rate() / 1000;
|
||||
|
||||
// Pre-allocate output buffer capacity
|
||||
output_buffer_.reserve(frame_samples_);
|
||||
|
||||
int ref_num = codec_->input_reference() ? 1 : 0;
|
||||
|
||||
std::string input_format;
|
||||
@@ -79,7 +85,7 @@ size_t AfeAudioProcessor::GetFeedSize() {
|
||||
return afe_iface_->get_feed_chunksize(afe_data_) * codec_->input_channels();
|
||||
}
|
||||
|
||||
void AfeAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
||||
void AfeAudioProcessor::Feed(std::vector<int16_t>&& data) {
|
||||
if (afe_data_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
@@ -141,7 +147,24 @@ void AfeAudioProcessor::AudioProcessorTask() {
|
||||
}
|
||||
|
||||
if (output_callback_) {
|
||||
output_callback_(std::vector<int16_t>(res->data, res->data + res->data_size / sizeof(int16_t)));
|
||||
size_t samples = res->data_size / sizeof(int16_t);
|
||||
|
||||
// Add data to buffer
|
||||
output_buffer_.insert(output_buffer_.end(), res->data, res->data + samples);
|
||||
|
||||
// Output complete frames when buffer has enough data
|
||||
while (output_buffer_.size() >= frame_samples_) {
|
||||
if (output_buffer_.size() == frame_samples_) {
|
||||
// If buffer size equals frame size, move the entire buffer
|
||||
output_callback_(std::move(output_buffer_));
|
||||
output_buffer_.clear();
|
||||
output_buffer_.reserve(frame_samples_);
|
||||
} else {
|
||||
// If buffer size exceeds frame size, copy one frame and remove it
|
||||
output_callback_(std::vector<int16_t>(output_buffer_.begin(), output_buffer_.begin() + frame_samples_));
|
||||
output_buffer_.erase(output_buffer_.begin(), output_buffer_.begin() + frame_samples_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,8 +18,8 @@ public:
|
||||
AfeAudioProcessor();
|
||||
~AfeAudioProcessor();
|
||||
|
||||
void Initialize(AudioCodec* codec) override;
|
||||
void Feed(const std::vector<int16_t>& data) override;
|
||||
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
|
||||
void Feed(std::vector<int16_t>&& data) override;
|
||||
void Start() override;
|
||||
void Stop() override;
|
||||
bool IsRunning() override;
|
||||
@@ -35,7 +35,10 @@ private:
|
||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||
AudioCodec* codec_ = nullptr;
|
||||
int frame_duration_ms_ = 0;
|
||||
int frame_samples_ = 0;
|
||||
bool is_speaking_ = false;
|
||||
std::vector<int16_t> output_buffer_;
|
||||
|
||||
void AudioProcessorTask();
|
||||
};
|
||||
|
||||
@@ -3,16 +3,32 @@
|
||||
|
||||
#define TAG "NoAudioProcessor"
|
||||
|
||||
void NoAudioProcessor::Initialize(AudioCodec* codec) {
|
||||
codec_ = codec;
|
||||
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) :
|
||||
codec_(codec),
|
||||
frame_duration_ms_(frame_duration_ms) {
|
||||
frame_samples_ = frame_duration_ms_ * codec_->input_sample_rate() / 1000;
|
||||
}
|
||||
|
||||
void NoAudioProcessor::Feed(const std::vector<int16_t>& data) {
|
||||
void NoAudioProcessor::Feed(std::vector<int16_t>&& data) {
|
||||
if (!is_running_ || !output_callback_) {
|
||||
return;
|
||||
}
|
||||
// 直接将输入数据传递给输出回调
|
||||
output_callback_(std::vector<int16_t>(data));
|
||||
|
||||
if (data.size() != frame_samples_) {
|
||||
ESP_LOGE(TAG, "Feed data size is not equal to frame size, feed size: %u, frame size: %u", data.size(), frame_samples_);
|
||||
return;
|
||||
}
|
||||
|
||||
if (codec_->input_channels() == 2) {
|
||||
// If input channels is 2, we need to fetch the left channel data
|
||||
auto mono_data = std::vector<int16_t>(data.size() / 2);
|
||||
for (size_t i = 0, j = 0; i < mono_data.size(); ++i, j += 2) {
|
||||
mono_data[i] = data[j];
|
||||
}
|
||||
output_callback_(std::move(mono_data));
|
||||
} else {
|
||||
output_callback_(std::move(data));
|
||||
}
|
||||
}
|
||||
|
||||
void NoAudioProcessor::Start() {
|
||||
@@ -39,8 +55,7 @@ size_t NoAudioProcessor::GetFeedSize() {
|
||||
if (!codec_) {
|
||||
return 0;
|
||||
}
|
||||
// 返回一个固定的帧大小,比如 30ms 的数据
|
||||
return 30 * codec_->input_sample_rate() / 1000;
|
||||
return frame_samples_;
|
||||
}
|
||||
|
||||
void NoAudioProcessor::EnableDeviceAec(bool enable) {
|
||||
|
||||
@@ -12,8 +12,8 @@ public:
|
||||
NoAudioProcessor() = default;
|
||||
~NoAudioProcessor() = default;
|
||||
|
||||
void Initialize(AudioCodec* codec) override;
|
||||
void Feed(const std::vector<int16_t>& data) override;
|
||||
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
|
||||
void Feed(std::vector<int16_t>&& data) override;
|
||||
void Start() override;
|
||||
void Stop() override;
|
||||
bool IsRunning() override;
|
||||
@@ -24,6 +24,8 @@ public:
|
||||
|
||||
private:
|
||||
AudioCodec* codec_ = nullptr;
|
||||
int frame_duration_ms_ = 0;
|
||||
int frame_samples_ = 0;
|
||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||
bool is_running_ = false;
|
||||
|
||||
Reference in New Issue
Block a user