forked from xiaozhi/xiaozhi-esp32
separate encoder for detect word
This commit is contained in:
@@ -277,10 +277,14 @@ void Application::EncodeWakeWordData() {
|
|||||||
}
|
}
|
||||||
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
|
||||||
Application* app = (Application*)arg;
|
Application* app = (Application*)arg;
|
||||||
|
auto start_time = esp_timer_get_time();
|
||||||
// encode detect packets
|
// encode detect packets
|
||||||
for (auto& pcm : app->wake_word_pcm_) {
|
OpusEncoder* encoder = new OpusEncoder();
|
||||||
app->opus_encoder_.Encode(pcm, [app](const iovec opus) {
|
encoder->Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1, 60);
|
||||||
// append the opus data to the packet
|
encoder->SetComplexity(2);
|
||||||
|
|
||||||
|
for (auto& pcm: app->wake_word_pcm_) {
|
||||||
|
encoder->Encode(pcm, [app](const iovec opus) {
|
||||||
iovec iov = {
|
iovec iov = {
|
||||||
.iov_base = heap_caps_malloc(opus.iov_len, MALLOC_CAP_SPIRAM),
|
.iov_base = heap_caps_malloc(opus.iov_len, MALLOC_CAP_SPIRAM),
|
||||||
.iov_len = opus.iov_len
|
.iov_len = opus.iov_len
|
||||||
@@ -288,10 +292,14 @@ void Application::EncodeWakeWordData() {
|
|||||||
memcpy(iov.iov_base, opus.iov_base, opus.iov_len);
|
memcpy(iov.iov_base, opus.iov_base, opus.iov_len);
|
||||||
app->wake_word_opus_.push_back(iov);
|
app->wake_word_opus_.push_back(iov);
|
||||||
});
|
});
|
||||||
free(pcm.iov_base);
|
heap_caps_free(pcm.iov_base);
|
||||||
}
|
}
|
||||||
app->wake_word_pcm_.clear();
|
app->wake_word_pcm_.clear();
|
||||||
|
|
||||||
|
auto end_time = esp_timer_get_time();
|
||||||
|
ESP_LOGI(TAG, "Encode wake word data opus packets: %d in %lld ms", app->wake_word_opus_.size(), (end_time - start_time) / 1000);
|
||||||
xEventGroupSetBits(app->event_group_, DETECT_PACKETS_ENCODED);
|
xEventGroupSetBits(app->event_group_, DETECT_PACKETS_ENCODED);
|
||||||
|
delete encoder;
|
||||||
vTaskDelete(NULL);
|
vTaskDelete(NULL);
|
||||||
}, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
}, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
|
||||||
}
|
}
|
||||||
@@ -333,7 +341,7 @@ void Application::AudioDetectionTask() {
|
|||||||
StartWebSocketClient();
|
StartWebSocketClient();
|
||||||
|
|
||||||
// Here the websocket is done, and we also wait for the wake word data to be encoded
|
// Here the websocket is done, and we also wait for the wake word data to be encoded
|
||||||
xEventGroupWaitBits(event_group_, DETECT_PACKETS_ENCODED, pdFALSE, pdTRUE, portMAX_DELAY);
|
xEventGroupWaitBits(event_group_, DETECT_PACKETS_ENCODED, pdTRUE, pdTRUE, portMAX_DELAY);
|
||||||
|
|
||||||
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
||||||
if (ws_client_ && ws_client_->IsConnected()) {
|
if (ws_client_ && ws_client_->IsConnected()) {
|
||||||
@@ -341,6 +349,7 @@ void Application::AudioDetectionTask() {
|
|||||||
SendWakeWordData();
|
SendWakeWordData();
|
||||||
// Send a ready message to indicate the server that the wake word data is sent
|
// Send a ready message to indicate the server that the wake word data is sent
|
||||||
SetChatState(kChatStateWakeWordDetected);
|
SetChatState(kChatStateWakeWordDetected);
|
||||||
|
opus_encoder_.ResetState();
|
||||||
// If connected, the hello message is already sent, so we can start communication
|
// If connected, the hello message is already sent, so we can start communication
|
||||||
xEventGroupSetBits(event_group_, COMMUNICATION_RUNNING);
|
xEventGroupSetBits(event_group_, COMMUNICATION_RUNNING);
|
||||||
|
|
||||||
@@ -405,11 +414,6 @@ void Application::AudioEncodeTask() {
|
|||||||
iovec pcm;
|
iovec pcm;
|
||||||
xQueueReceive(audio_encode_queue_, &pcm, portMAX_DELAY);
|
xQueueReceive(audio_encode_queue_, &pcm, portMAX_DELAY);
|
||||||
|
|
||||||
if (pcm.iov_len == 0) {
|
|
||||||
ESP_LOGE(TAG, "Empty audio data");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode audio data
|
// Encode audio data
|
||||||
opus_encoder_.Encode(pcm, [this](const iovec opus) {
|
opus_encoder_.Encode(pcm, [this](const iovec opus) {
|
||||||
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
std::lock_guard<std::recursive_mutex> lock(mutex_);
|
||||||
|
|||||||
@@ -28,13 +28,18 @@ void OpusEncoder::Configure(int sample_rate, int channels, int duration_ms) {
|
|||||||
|
|
||||||
// Set DTX
|
// Set DTX
|
||||||
opus_encoder_ctl(audio_enc_, OPUS_SET_DTX(1));
|
opus_encoder_ctl(audio_enc_, OPUS_SET_DTX(1));
|
||||||
// Set complexity to 5
|
SetComplexity(5);
|
||||||
opus_encoder_ctl(audio_enc_, OPUS_SET_COMPLEXITY(5));
|
|
||||||
|
|
||||||
frame_size_ = sample_rate / 1000 * duration_ms;
|
frame_size_ = sample_rate / 1000 * duration_ms;
|
||||||
out_buffer_.resize(sample_rate * channels * sizeof(int16_t));
|
out_buffer_.resize(sample_rate * channels * sizeof(int16_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OpusEncoder::SetComplexity(int complexity) {
|
||||||
|
if (audio_enc_ != nullptr) {
|
||||||
|
opus_encoder_ctl(audio_enc_, OPUS_SET_COMPLEXITY(complexity));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void OpusEncoder::Encode(const iovec pcm, std::function<void(const iovec opus)> handler) {
|
void OpusEncoder::Encode(const iovec pcm, std::function<void(const iovec opus)> handler) {
|
||||||
if (audio_enc_ == nullptr) {
|
if (audio_enc_ == nullptr) {
|
||||||
ESP_LOGE(TAG, "Audio encoder is not configured");
|
ESP_LOGE(TAG, "Audio encoder is not configured");
|
||||||
@@ -58,3 +63,10 @@ void OpusEncoder::Encode(const iovec pcm, std::function<void(const iovec opus)>
|
|||||||
in_buffer_.erase(in_buffer_.begin(), in_buffer_.begin() + frame_size_);
|
in_buffer_.erase(in_buffer_.begin(), in_buffer_.begin() + frame_size_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OpusEncoder::ResetState() {
|
||||||
|
if (audio_enc_ != nullptr) {
|
||||||
|
opus_encoder_ctl(audio_enc_, OPUS_RESET_STATE);
|
||||||
|
}
|
||||||
|
in_buffer_.clear();
|
||||||
|
}
|
||||||
|
|||||||
@@ -16,8 +16,10 @@ public:
|
|||||||
~OpusEncoder();
|
~OpusEncoder();
|
||||||
|
|
||||||
void Configure(int sample_rate, int channels, int duration_ms = 60);
|
void Configure(int sample_rate, int channels, int duration_ms = 60);
|
||||||
|
void SetComplexity(int complexity);
|
||||||
void Encode(const iovec pcm, std::function<void(const iovec opus)> handler);
|
void Encode(const iovec pcm, std::function<void(const iovec opus)> handler);
|
||||||
bool IsBufferEmpty() const { return in_buffer_.empty(); }
|
bool IsBufferEmpty() const { return in_buffer_.empty(); }
|
||||||
|
void ResetState();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct OpusEncoder* audio_enc_ = nullptr;
|
struct OpusEncoder* audio_enc_ = nullptr;
|
||||||
|
|||||||
Reference in New Issue
Block a user