forked from xiaozhi/xiaozhi-esp32
Fix frame samples for server AEC
This commit is contained in:
@@ -36,7 +36,9 @@ static const char* const STATE_STRINGS[] = {
|
|||||||
Application::Application() {
|
Application::Application() {
|
||||||
event_group_ = xEventGroupCreate();
|
event_group_ = xEventGroupCreate();
|
||||||
|
|
||||||
#if CONFIG_USE_DEVICE_AEC
|
#if CONFIG_USE_DEVICE_AEC && CONFIG_USE_SERVER_AEC
|
||||||
|
#error "CONFIG_USE_DEVICE_AEC and CONFIG_USE_SERVER_AEC cannot be enabled at the same time"
|
||||||
|
#elif CONFIG_USE_DEVICE_AEC
|
||||||
aec_mode_ = kAecOnDeviceSide;
|
aec_mode_ = kAecOnDeviceSide;
|
||||||
#elif CONFIG_USE_SERVER_AEC
|
#elif CONFIG_USE_SERVER_AEC
|
||||||
aec_mode_ = kAecOnServerSide;
|
aec_mode_ = kAecOnServerSide;
|
||||||
|
|||||||
@@ -288,11 +288,13 @@ void AudioService::AudioOutputTask() {
|
|||||||
last_output_time_ = std::chrono::steady_clock::now();
|
last_output_time_ = std::chrono::steady_clock::now();
|
||||||
debug_statistics_.playback_count++;
|
debug_statistics_.playback_count++;
|
||||||
|
|
||||||
/* Record the timestamp */
|
#if CONFIG_USE_SERVER_AEC
|
||||||
|
/* Record the timestamp for server AEC */
|
||||||
if (task->timestamp > 0) {
|
if (task->timestamp > 0) {
|
||||||
lock.lock();
|
lock.lock();
|
||||||
timestamp_queue_.push_back(task->timestamp);
|
timestamp_queue_.push_back(task->timestamp);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
ESP_LOGW(TAG, "Audio output task stopped");
|
ESP_LOGW(TAG, "Audio output task stopped");
|
||||||
@@ -405,7 +407,7 @@ void AudioService::PushTaskToEncodeQueue(AudioTaskType type, std::vector<int16_t
|
|||||||
if (timestamp_queue_.size() <= MAX_TIMESTAMPS_IN_QUEUE) {
|
if (timestamp_queue_.size() <= MAX_TIMESTAMPS_IN_QUEUE) {
|
||||||
task->timestamp = timestamp_queue_.front();
|
task->timestamp = timestamp_queue_.front();
|
||||||
} else {
|
} else {
|
||||||
ESP_LOGW(TAG, "Timestamp queue is full, dropping timestamp");
|
ESP_LOGW(TAG, "Timestamp queue (%u) is full, dropping timestamp", timestamp_queue_.size());
|
||||||
}
|
}
|
||||||
timestamp_queue_.pop_front();
|
timestamp_queue_.pop_front();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,8 +12,7 @@ AfeAudioProcessor::AfeAudioProcessor()
|
|||||||
|
|
||||||
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
||||||
codec_ = codec;
|
codec_ = codec;
|
||||||
frame_duration_ms_ = frame_duration_ms;
|
frame_samples_ = frame_duration_ms * 16000 / 1000;
|
||||||
frame_samples_ = frame_duration_ms_ * codec_->input_sample_rate() / 1000;
|
|
||||||
|
|
||||||
// Pre-allocate output buffer capacity
|
// Pre-allocate output buffer capacity
|
||||||
output_buffer_.reserve(frame_samples_);
|
output_buffer_.reserve(frame_samples_);
|
||||||
|
|||||||
@@ -35,7 +35,6 @@ private:
|
|||||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||||
AudioCodec* codec_ = nullptr;
|
AudioCodec* codec_ = nullptr;
|
||||||
int frame_duration_ms_ = 0;
|
|
||||||
int frame_samples_ = 0;
|
int frame_samples_ = 0;
|
||||||
bool is_speaking_ = false;
|
bool is_speaking_ = false;
|
||||||
std::vector<int16_t> output_buffer_;
|
std::vector<int16_t> output_buffer_;
|
||||||
|
|||||||
@@ -3,10 +3,9 @@
|
|||||||
|
|
||||||
#define TAG "NoAudioProcessor"
|
#define TAG "NoAudioProcessor"
|
||||||
|
|
||||||
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) :
|
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
||||||
codec_(codec),
|
codec_ = codec;
|
||||||
frame_duration_ms_(frame_duration_ms) {
|
frame_samples_ = frame_duration_ms * 16000 / 1000;
|
||||||
frame_samples_ = frame_duration_ms_ * codec_->input_sample_rate() / 1000;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void NoAudioProcessor::Feed(std::vector<int16_t>&& data) {
|
void NoAudioProcessor::Feed(std::vector<int16_t>&& data) {
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
AudioCodec* codec_ = nullptr;
|
AudioCodec* codec_ = nullptr;
|
||||||
int frame_duration_ms_ = 0;
|
|
||||||
int frame_samples_ = 0;
|
int frame_samples_ = 0;
|
||||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ dependencies:
|
|||||||
espressif/esp_lcd_panel_io_additions: ^1.0.1
|
espressif/esp_lcd_panel_io_additions: ^1.0.1
|
||||||
78/esp_lcd_nv3023: ~1.0.0
|
78/esp_lcd_nv3023: ~1.0.0
|
||||||
78/esp-wifi-connect: ~2.4.3
|
78/esp-wifi-connect: ~2.4.3
|
||||||
78/esp-opus-encoder: ~2.3.3
|
78/esp-opus-encoder: ~2.4.0
|
||||||
78/esp-ml307: ~3.0.2
|
78/esp-ml307: ~3.0.2
|
||||||
78/xiaozhi-fonts: ~1.3.2
|
78/xiaozhi-fonts: ~1.3.2
|
||||||
espressif/led_strip: ^2.5.5
|
espressif/led_strip: ^2.5.5
|
||||||
|
|||||||
Reference in New Issue
Block a user