separate encoder for detect word

This commit is contained in:
Terrence
2024-09-04 13:36:20 +08:00
parent 10574dd2bc
commit bad888e3ec
3 changed files with 30 additions and 12 deletions

View File

@@ -277,10 +277,14 @@ void Application::EncodeWakeWordData() {
}
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
Application* app = (Application*)arg;
auto start_time = esp_timer_get_time();
// encode detect packets
for (auto& pcm : app->wake_word_pcm_) {
app->opus_encoder_.Encode(pcm, [app](const iovec opus) {
// append the opus data to the packet
OpusEncoder* encoder = new OpusEncoder();
encoder->Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1, 60);
encoder->SetComplexity(2);
for (auto& pcm: app->wake_word_pcm_) {
encoder->Encode(pcm, [app](const iovec opus) {
iovec iov = {
.iov_base = heap_caps_malloc(opus.iov_len, MALLOC_CAP_SPIRAM),
.iov_len = opus.iov_len
@@ -288,10 +292,14 @@ void Application::EncodeWakeWordData() {
memcpy(iov.iov_base, opus.iov_base, opus.iov_len);
app->wake_word_opus_.push_back(iov);
});
free(pcm.iov_base);
heap_caps_free(pcm.iov_base);
}
app->wake_word_pcm_.clear();
auto end_time = esp_timer_get_time();
ESP_LOGI(TAG, "Encode wake word data opus packets: %d in %lld ms", app->wake_word_opus_.size(), (end_time - start_time) / 1000);
xEventGroupSetBits(app->event_group_, DETECT_PACKETS_ENCODED);
delete encoder;
vTaskDelete(NULL);
}, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
}
@@ -333,7 +341,7 @@ void Application::AudioDetectionTask() {
StartWebSocketClient();
// Here the websocket is done, and we also wait for the wake word data to be encoded
xEventGroupWaitBits(event_group_, DETECT_PACKETS_ENCODED, pdFALSE, pdTRUE, portMAX_DELAY);
xEventGroupWaitBits(event_group_, DETECT_PACKETS_ENCODED, pdTRUE, pdTRUE, portMAX_DELAY);
std::lock_guard<std::recursive_mutex> lock(mutex_);
if (ws_client_ && ws_client_->IsConnected()) {
@@ -341,6 +349,7 @@ void Application::AudioDetectionTask() {
SendWakeWordData();
// Send a ready message to indicate the server that the wake word data is sent
SetChatState(kChatStateWakeWordDetected);
opus_encoder_.ResetState();
// If connected, the hello message is already sent, so we can start communication
xEventGroupSetBits(event_group_, COMMUNICATION_RUNNING);
@@ -405,11 +414,6 @@ void Application::AudioEncodeTask() {
iovec pcm;
xQueueReceive(audio_encode_queue_, &pcm, portMAX_DELAY);
if (pcm.iov_len == 0) {
ESP_LOGE(TAG, "Empty audio data");
continue;
}
// Encode audio data
opus_encoder_.Encode(pcm, [this](const iovec opus) {
std::lock_guard<std::recursive_mutex> lock(mutex_);

View File

@@ -28,13 +28,18 @@ void OpusEncoder::Configure(int sample_rate, int channels, int duration_ms) {
// Set DTX
opus_encoder_ctl(audio_enc_, OPUS_SET_DTX(1));
// Set complexity to 5
opus_encoder_ctl(audio_enc_, OPUS_SET_COMPLEXITY(5));
SetComplexity(5);
frame_size_ = sample_rate / 1000 * duration_ms;
out_buffer_.resize(sample_rate * channels * sizeof(int16_t));
}
void OpusEncoder::SetComplexity(int complexity) {
if (audio_enc_ != nullptr) {
opus_encoder_ctl(audio_enc_, OPUS_SET_COMPLEXITY(complexity));
}
}
void OpusEncoder::Encode(const iovec pcm, std::function<void(const iovec opus)> handler) {
if (audio_enc_ == nullptr) {
ESP_LOGE(TAG, "Audio encoder is not configured");
@@ -58,3 +63,10 @@ void OpusEncoder::Encode(const iovec pcm, std::function<void(const iovec opus)>
in_buffer_.erase(in_buffer_.begin(), in_buffer_.begin() + frame_size_);
}
}
void OpusEncoder::ResetState() {
if (audio_enc_ != nullptr) {
opus_encoder_ctl(audio_enc_, OPUS_RESET_STATE);
}
in_buffer_.clear();
}

View File

@@ -16,8 +16,10 @@ public:
~OpusEncoder();
void Configure(int sample_rate, int channels, int duration_ms = 60);
void SetComplexity(int complexity);
void Encode(const iovec pcm, std::function<void(const iovec opus)> handler);
bool IsBufferEmpty() const { return in_buffer_.empty(); }
void ResetState();
private:
struct OpusEncoder* audio_enc_ = nullptr;