Switch to 2.0 branch (#1152)

* Adapt boards to v2 partition tables

* fix esp log error

* fix display style

* reset emotion after download assets

* fix compiling

* update assets default url

* Add user only tools

* Add image cache

* smaller cache and buffer, more heap

* use MAIN_EVENT_CLOCK_TICK to avoid audio glitches

* bump to 2.0.0

* fix compiling errors

---------

Co-authored-by: Xiaoxia <terrence.huang@tenclass.com>
This commit is contained in:
Xiaoxia
2025-09-04 15:41:28 +08:00
committed by GitHub
parent 3a3dfc003e
commit 83f6f8c703
196 changed files with 3918 additions and 4902 deletions

View File

@@ -5,13 +5,14 @@
#include <vector>
#include <functional>
#include <model_path.h>
#include "audio_codec.h"
class AudioProcessor {
public:
virtual ~AudioProcessor() = default;
virtual void Initialize(AudioCodec* codec, int frame_duration_ms) = 0;
virtual void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) = 0;
virtual void Feed(std::vector<int16_t>&& data) = 0;
virtual void Start() = 0;
virtual void Stop() = 0;

View File

@@ -479,7 +479,7 @@ void AudioService::EnableWakeWordDetection(bool enable) {
ESP_LOGD(TAG, "%s wake word detection", enable ? "Enabling" : "Disabling");
if (enable) {
if (!wake_word_initialized_) {
if (!wake_word_->Initialize(codec_)) {
if (!wake_word_->Initialize(codec_, models_list_)) {
ESP_LOGE(TAG, "Failed to initialize wake word");
return;
}
@@ -497,7 +497,7 @@ void AudioService::EnableVoiceProcessing(bool enable) {
ESP_LOGD(TAG, "%s voice processing", enable ? "Enabling" : "Disabling");
if (enable) {
if (!audio_processor_initialized_) {
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS);
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
audio_processor_initialized_ = true;
}
@@ -528,7 +528,7 @@ void AudioService::EnableAudioTesting(bool enable) {
void AudioService::EnableDeviceAec(bool enable) {
ESP_LOGI(TAG, "%s device AEC", enable ? "Enabling" : "Disabling");
if (!audio_processor_initialized_) {
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS);
audio_processor_->Initialize(codec_, OPUS_FRAME_DURATION_MS, models_list_);
audio_processor_initialized_ = true;
}
@@ -666,4 +666,8 @@ void AudioService::CheckAndUpdateAudioPowerState() {
if (!codec_->input_enabled() && !codec_->output_enabled()) {
esp_timer_stop(audio_power_timer_);
}
}
void AudioService::SetModelsList(srmodel_list_t* models_list) {
models_list_ = models_list;
}

View File

@@ -11,6 +11,7 @@
#include <freertos/task.h>
#include <freertos/event_groups.h>
#include <esp_timer.h>
#include <model_path.h>
#include <opus_encoder.h>
#include <opus_decoder.h>
@@ -106,6 +107,7 @@ public:
void PlaySound(const std::string_view& sound);
bool ReadAudioData(std::vector<int16_t>& data, int sample_rate, int samples);
void ResetDecoder();
void SetModelsList(srmodel_list_t* models_list);
private:
AudioCodec* codec_ = nullptr;
@@ -119,6 +121,7 @@ private:
OpusResampler reference_resampler_;
OpusResampler output_resampler_;
DebugStatistics debug_statistics_;
srmodel_list_t* models_list_ = nullptr;
EventGroupHandle_t event_group_;

View File

@@ -10,7 +10,7 @@ AfeAudioProcessor::AfeAudioProcessor()
event_group_ = xEventGroupCreate();
}
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) {
codec_ = codec;
frame_samples_ = frame_duration_ms * 16000 / 1000;
@@ -27,7 +27,13 @@ void AfeAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
input_format.push_back('R');
}
srmodel_list_t *models = esp_srmodel_init("model");
srmodel_list_t *models;
if (models_list == nullptr) {
models = esp_srmodel_init("model");
} else {
models = models_list;
}
char* ns_model_name = esp_srmodel_filter(models, ESP_NSNET_PREFIX, NULL);
char* vad_model_name = esp_srmodel_filter(models, ESP_VADN_PREFIX, NULL);

View File

@@ -18,7 +18,7 @@ public:
AfeAudioProcessor();
~AfeAudioProcessor();
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
void Feed(std::vector<int16_t>&& data) override;
void Start() override;
void Stop() override;

View File

@@ -3,7 +3,7 @@
#define TAG "NoAudioProcessor"
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms) {
void NoAudioProcessor::Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) {
codec_ = codec;
frame_samples_ = frame_duration_ms * 16000 / 1000;
}

View File

@@ -12,7 +12,7 @@ public:
NoAudioProcessor() = default;
~NoAudioProcessor() = default;
void Initialize(AudioCodec* codec, int frame_duration_ms) override;
void Initialize(AudioCodec* codec, int frame_duration_ms, srmodel_list_t* models_list) override;
void Feed(std::vector<int16_t>&& data) override;
void Start() override;
void Stop() override;

View File

@@ -5,13 +5,14 @@
#include <vector>
#include <functional>
#include <model_path.h>
#include "audio_codec.h"
class WakeWord {
public:
virtual ~WakeWord() = default;
virtual bool Initialize(AudioCodec* codec) = 0;
virtual bool Initialize(AudioCodec* codec, srmodel_list_t* models_list) = 0;
virtual void Feed(const std::vector<int16_t>& data) = 0;
virtual void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) = 0;
virtual void Start() = 0;

View File

@@ -36,11 +36,16 @@ AfeWakeWord::~AfeWakeWord() {
vEventGroupDelete(event_group_);
}
bool AfeWakeWord::Initialize(AudioCodec* codec) {
bool AfeWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
codec_ = codec;
int ref_num = codec_->input_reference() ? 1 : 0;
models_ = esp_srmodel_init("model");
if (models_list == nullptr) {
models_ = esp_srmodel_init("model");
} else {
models_ = models_list;
}
if (models_ == nullptr || models_->num == -1) {
ESP_LOGE(TAG, "Failed to initialize wakenet model");
return false;

View File

@@ -24,7 +24,7 @@ public:
AfeWakeWord();
~AfeWakeWord();
bool Initialize(AudioCodec* codec);
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();

View File

@@ -34,10 +34,15 @@ CustomWakeWord::~CustomWakeWord() {
}
}
bool CustomWakeWord::Initialize(AudioCodec* codec) {
bool CustomWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
codec_ = codec;
models_ = esp_srmodel_init("model");
if (models_list == nullptr) {
models_ = esp_srmodel_init("model");
} else {
models_ = models_list;
}
if (models_ == nullptr || models_->num == -1) {
ESP_LOGE(TAG, "Failed to initialize wakenet model");
return false;

View File

@@ -22,7 +22,7 @@ public:
CustomWakeWord();
~CustomWakeWord();
bool Initialize(AudioCodec* codec);
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();

View File

@@ -14,10 +14,15 @@ EspWakeWord::~EspWakeWord() {
}
}
bool EspWakeWord::Initialize(AudioCodec* codec) {
bool EspWakeWord::Initialize(AudioCodec* codec, srmodel_list_t* models_list) {
codec_ = codec;
wakenet_model_ = esp_srmodel_init("model");
if (models_list == nullptr) {
wakenet_model_ = esp_srmodel_init("model");
} else {
wakenet_model_ = models_list;
}
if (wakenet_model_ == nullptr || wakenet_model_->num == -1) {
ESP_LOGE(TAG, "Failed to initialize wakenet model");
return false;

View File

@@ -18,7 +18,7 @@ public:
EspWakeWord();
~EspWakeWord();
bool Initialize(AudioCodec* codec);
bool Initialize(AudioCodec* codec, srmodel_list_t* models_list);
void Feed(const std::vector<int16_t>& data);
void OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback);
void Start();