forked from xiaozhi/xiaozhi-esp32
Switch to 2.0 branch (#1152)
* Adapt boards to v2 partition tables * fix esp log error * fix display style * reset emotion after download assets * fix compiling * update assets default url * Add user only tools * Add image cache * smaller cache and buffer, more heap * use MAIN_EVENT_CLOCK_TICK to avoid audio glitches * bump to 2.0.0 * fix compiling errors --------- Co-authored-by: Xiaoxia <terrence.huang@tenclass.com>
This commit is contained in:
@@ -5,13 +5,14 @@
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include <model_path.h>
|
||||
#include "audio_codec.h"
|
||||
|
||||
class AudioProcessor {
|
||||
public:
|
||||
virtual ~AudioProcessor() = default;
|
||||
|
||||
virtual void Initialize(AudioCodec* codec, int frame_duration_ms) = 0;
|
||||
virtual void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) = 0;
|
||||
virtual void Feed(std::vector<int16_t>&& data) = 0;
|
||||
virtual void Start() = 0;
|
||||
virtual void Stop() = 0;
|
||||
|
||||
@@ -479,7 +479,7 @@ void AudioService::EnableWakeWordDetection(bool enable) {
|
||||
ESP_LOGD(TAG, "%s wake word detection", enable ? "Enabling" : "Disabling");
|
||||
if (enable) {
|
||||
if (!wake_word_initialized_) {
|
||||
if (!wake_word_->Initialize(codec_)) {
|
||||
if (!wake_word_->Initialize(codec_, models_list_)) {
|
||||
ESP_LOGE(TAG, "Failed to initialize wake word");
|
||||
return;
|
||||
}
|
||||
@@ -497,7 +497,7 @@ void AudioService::EnableVoiceProcessing(bool enable) {
|
||||
ESP_LOGD(TAG, "%s voice processing", enable ? "Enabling" : "Disabling");
|
||||
if (enable) {
|
||||
if (!audio_processor_initialized_) {
|
||||
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS);
|
||||
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
|
||||
audio_processor_initialized_ = true;
|
||||
}
|
||||
|
||||
@@ -528,7 +528,7 @@ void AudioService::EnableAudioTesting(bool enable) {
|
||||
void AudioService::EnableDeviceAec(bool enable) {
|
||||
ESP_LOGI(TAG, "%s device AEC", enable ? "Enabling" : "Disabling");
|
||||
if (!audio_processor_initialized_) {
|
||||
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS);
|
||||
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
|
||||
audio_processor_initialized_ = true;
|
||||
}
|
||||
|
||||
@@ -666,4 +666,8 @@ void AudioService::CheckAndUpdateAudioPowerState() {
|
||||
if (!codec_->input_enabled() && !codec_->output_enabled()) {
|
||||
esp_timer_stop(audio_power_timer_);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioService::SetModelsList(srmodel_list_t* models_list) {
|
||||
models_list_ = models_list;
|
||||
}
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <freertos/task.h>
|
||||
#include <freertos/event_groups.h>
|
||||
#include <esp_timer.h>
|
||||
#include <model_path.h>
|
||||
|
||||
#include <opus_encoder.h>
|
||||
#include <opus_decoder.h>
|
||||
@@ -106,6 +107,7 @@ public:
|
||||
void PlaySound(const std::string_view& sound);
|
||||
bool ReadAudioData(std::vector<int16_t>& data, int sample_rate, int samples);
|
||||
void ResetDecoder();
|
||||
void SetModelsList(srmodel_list_t* models_list);
|
||||
|
||||
private:
|
||||
AudioCodec* codec_ = nullptr;
|
||||
@@ -119,6 +121,7 @@ private:
|
||||
OpusResampler reference_resampler_;
|
||||
OpusResampler output_resampler_;
|
||||
DebugStatistics debug_statistics_;
|
||||
srmodel_list_t* models_list_ = nullptr;
|
||||
|
||||
EventGroupHandle_t event_group_;
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ AfeAudioProcessor::AfeAudioProcessor()
|
||||
event_group_ = xEventGroupCreate();
|
||||
}
|
||||
|
||||
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
||||
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) {
|
||||
codec_ = codec;
|
||||
frame_samples_ = frame_duration_ms * 16000 / 1000;
|
||||
|
||||
@@ -27,7 +27,13 @@ void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
||||
input_format.push_back('R');
|
||||
}
|
||||
|
||||
srmodel_list_t *models = esp_srmodel_init("model");
|
||||
srmodel_list_t *models;
|
||||
if (models_list == nullptr) {
|
||||
models = esp_srmodel_init("model");
|
||||
} else {
|
||||
models = models_list;
|
||||
}
|
||||
|
||||
char* ns_model_name = esp_srmodel_filter(models, ESP_NSNET_PREFIX, NULL);
|
||||
char* vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ public:
|
||||
AfeAudioProcessor();
|
||||
~AfeAudioProcessor();
|
||||
|
||||
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
|
||||
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
|
||||
void Feed(std::vector<int16_t>&& data) override;
|
||||
void Start() override;
|
||||
void Stop() override;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#define TAG "NoAudioProcessor"
|
||||
|
||||
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
|
||||
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) {
|
||||
codec_ = codec;
|
||||
frame_samples_ = frame_duration_ms * 16000 / 1000;
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ public:
|
||||
NoAudioProcessor() = default;
|
||||
~NoAudioProcessor() = default;
|
||||
|
||||
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
|
||||
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
|
||||
void Feed(std::vector<int16_t>&& data) override;
|
||||
void Start() override;
|
||||
void Stop() override;
|
||||
|
||||
@@ -5,13 +5,14 @@
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
|
||||
#include <model_path.h>
|
||||
#include "audio_codec.h"
|
||||
|
||||
class WakeWord {
|
||||
public:
|
||||
virtual ~WakeWord() = default;
|
||||
|
||||
virtual bool Initialize(AudioCodec* codec) = 0;
|
||||
virtual bool Initialize(AudioCodec* codec, srmodel_list_t* models_list) = 0;
|
||||
virtual void Feed(const std::vector<int16_t>& data) = 0;
|
||||
virtual void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) = 0;
|
||||
virtual void Start() = 0;
|
||||
|
||||
@@ -36,11 +36,16 @@ AfeWakeWord::~AfeWakeWord() {
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
bool AfeWakeWord::Initialize(AudioCodec* codec) {
|
||||
bool AfeWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
|
||||
codec_ = codec;
|
||||
int ref_num = codec_->input_reference() ? 1 : 0;
|
||||
|
||||
models_ = esp_srmodel_init("model");
|
||||
if (models_list == nullptr) {
|
||||
models_ = esp_srmodel_init("model");
|
||||
} else {
|
||||
models_ = models_list;
|
||||
}
|
||||
|
||||
if (models_ == nullptr || models_->num == -1) {
|
||||
ESP_LOGE(TAG, "Failed to initialize wakenet model");
|
||||
return false;
|
||||
|
||||
@@ -24,7 +24,7 @@ public:
|
||||
AfeWakeWord();
|
||||
~AfeWakeWord();
|
||||
|
||||
bool Initialize(AudioCodec* codec);
|
||||
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
|
||||
void Feed(const std::vector<int16_t>& data);
|
||||
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
|
||||
void Start();
|
||||
|
||||
@@ -34,10 +34,15 @@ CustomWakeWord::~CustomWakeWord() {
|
||||
}
|
||||
}
|
||||
|
||||
bool CustomWakeWord::Initialize(AudioCodec* codec) {
|
||||
bool CustomWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
|
||||
codec_ = codec;
|
||||
|
||||
models_ = esp_srmodel_init("model");
|
||||
if (models_list == nullptr) {
|
||||
models_ = esp_srmodel_init("model");
|
||||
} else {
|
||||
models_ = models_list;
|
||||
}
|
||||
|
||||
if (models_ == nullptr || models_->num == -1) {
|
||||
ESP_LOGE(TAG, "Failed to initialize wakenet model");
|
||||
return false;
|
||||
|
||||
@@ -22,7 +22,7 @@ public:
|
||||
CustomWakeWord();
|
||||
~CustomWakeWord();
|
||||
|
||||
bool Initialize(AudioCodec* codec);
|
||||
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
|
||||
void Feed(const std::vector<int16_t>& data);
|
||||
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
|
||||
void Start();
|
||||
|
||||
@@ -14,10 +14,15 @@ EspWakeWord::~EspWakeWord() {
|
||||
}
|
||||
}
|
||||
|
||||
bool EspWakeWord::Initialize(AudioCodec* codec) {
|
||||
bool EspWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
|
||||
codec_ = codec;
|
||||
|
||||
wakenet_model_ = esp_srmodel_init("model");
|
||||
if (models_list == nullptr) {
|
||||
wakenet_model_ = esp_srmodel_init("model");
|
||||
} else {
|
||||
wakenet_model_ = models_list;
|
||||
}
|
||||
|
||||
if (wakenet_model_ == nullptr || wakenet_model_->num == -1) {
|
||||
ESP_LOGE(TAG, "Failed to initialize wakenet model");
|
||||
return false;
|
||||
|
||||
@@ -18,7 +18,7 @@ public:
|
||||
EspWakeWord();
|
||||
~EspWakeWord();
|
||||
|
||||
bool Initialize(AudioCodec* codec);
|
||||
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
|
||||
void Feed(const std::vector<int16_t>& data);
|
||||
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
|
||||
void Start();
|
||||
|
||||
Reference in New Issue
Block a user