forked from xiaozhi/xiaozhi-esp32
ES8311 + ES7210
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
# CMakeLists in this exact order for cmake to work correctly
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
set(PROJECT_VER "0.3.3")
|
||||
set(PROJECT_VER "0.4.0")
|
||||
|
||||
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
|
||||
project(xiaozhi)
|
||||
|
||||
@@ -18,25 +18,28 @@
|
||||
|
||||
Application::Application()
|
||||
: boot_button_((gpio_num_t)CONFIG_BOOT_BUTTON_GPIO),
|
||||
volume_up_button_((gpio_num_t)CONFIG_VOLUME_UP_BUTTON_GPIO)
|
||||
#ifdef CONFIG_USE_ML307
|
||||
, ml307_at_modem_(CONFIG_ML307_TX_PIN, CONFIG_ML307_RX_PIN, 4096),
|
||||
http_(ml307_at_modem_),
|
||||
firmware_upgrade_(http_)
|
||||
#else
|
||||
, http_(),
|
||||
firmware_upgrade_(http_)
|
||||
#endif
|
||||
volume_up_button_((gpio_num_t)CONFIG_VOLUME_UP_BUTTON_GPIO),
|
||||
volume_down_button_((gpio_num_t)CONFIG_VOLUME_DOWN_BUTTON_GPIO),
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
, display_(CONFIG_DISPLAY_SDA_PIN, CONFIG_DISPLAY_SCL_PIN)
|
||||
display_(CONFIG_DISPLAY_SDA_PIN, CONFIG_DISPLAY_SCL_PIN),
|
||||
#endif
|
||||
#ifdef CONFIG_USE_ML307
|
||||
ml307_at_modem_(CONFIG_ML307_TX_PIN, CONFIG_ML307_RX_PIN, 4096),
|
||||
http_(ml307_at_modem_),
|
||||
#else
|
||||
http_(),
|
||||
#endif
|
||||
firmware_upgrade_(http_)
|
||||
{
|
||||
event_group_ = xEventGroupCreate();
|
||||
|
||||
opus_encoder_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1);
|
||||
|
||||
opus_encoder_.Configure(16000, 1);
|
||||
opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL);
|
||||
if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
|
||||
opus_resampler_.Configure(opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
output_resampler_.Configure(CONFIG_AUDIO_OUTPUT_SAMPLE_RATE, opus_decode_sample_rate_);
|
||||
}
|
||||
if (16000 != CONFIG_AUDIO_INPUT_SAMPLE_RATE) {
|
||||
input_resampler_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 16000);
|
||||
}
|
||||
|
||||
firmware_upgrade_.SetCheckVersionUrl(CONFIG_OTA_VERSION_URL);
|
||||
@@ -185,29 +188,49 @@ void Application::Start() {
|
||||
}
|
||||
#endif
|
||||
|
||||
audio_device_.OnInputData([this](const int16_t* data, int size) {
|
||||
audio_device_.Initialize();
|
||||
audio_device_.OnInputData([this](std::vector<int16_t>&& data) {
|
||||
if (16000 != CONFIG_AUDIO_INPUT_SAMPLE_RATE) {
|
||||
if (audio_device_.input_channels() == 2) {
|
||||
auto left_channel = std::vector<int16_t>(data.size() / 2);
|
||||
auto right_channel = std::vector<int16_t>(data.size() / 2);
|
||||
for (size_t i = 0, j = 0; i < left_channel.size(); ++i, j += 2) {
|
||||
left_channel[i] = data[j];
|
||||
right_channel[i] = data[j + 1];
|
||||
}
|
||||
auto resampled_left = std::vector<int16_t>(input_resampler_.GetOutputSamples(left_channel.size()));
|
||||
auto resampled_right = std::vector<int16_t>(input_resampler_.GetOutputSamples(right_channel.size()));
|
||||
input_resampler_.Process(left_channel.data(), left_channel.size(), resampled_left.data());
|
||||
input_resampler_.Process(right_channel.data(), right_channel.size(), resampled_right.data());
|
||||
data.resize(resampled_left.size() + resampled_right.size());
|
||||
for (size_t i = 0, j = 0; i < resampled_left.size(); ++i, j += 2) {
|
||||
data[j] = resampled_left[i];
|
||||
data[j + 1] = resampled_right[i];
|
||||
}
|
||||
} else {
|
||||
auto resampled = std::vector<int16_t>(input_resampler_.GetOutputSamples(data.size()));
|
||||
input_resampler_.Process(data.data(), data.size(), resampled.data());
|
||||
data = std::move(resampled);
|
||||
}
|
||||
}
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
if (audio_processor_.IsRunning()) {
|
||||
audio_processor_.Input(data, size);
|
||||
audio_processor_.Input(data);
|
||||
}
|
||||
if (wake_word_detect_.IsDetectionRunning()) {
|
||||
wake_word_detect_.Feed(data, size);
|
||||
wake_word_detect_.Feed(data);
|
||||
}
|
||||
#else
|
||||
std::vector<int16_t> pcm(data, data + size);
|
||||
Schedule([this, pcm = std::move(pcm)]() {
|
||||
Schedule([this, data = std::move(data)]() {
|
||||
if (chat_state_ == kChatStateListening) {
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
audio_encode_queue_.emplace_back(std::move(pcm));
|
||||
audio_encode_queue_.emplace_back(std::move(data));
|
||||
cv_.notify_all();
|
||||
}
|
||||
});
|
||||
#endif
|
||||
});
|
||||
|
||||
// Initialize the audio device
|
||||
audio_device_.Start(CONFIG_AUDIO_INPUT_SAMPLE_RATE, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
|
||||
// OPUS encoder / decoder use a lot of stack memory
|
||||
const size_t opus_stack_size = 4096 * 8;
|
||||
audio_encode_task_stack_ = (StackType_t*)malloc(opus_stack_size);
|
||||
@@ -221,9 +244,10 @@ void Application::Start() {
|
||||
Application* app = (Application*)arg;
|
||||
app->AudioPlayTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "play_audio", 4096 * 4, this, 5, NULL);
|
||||
}, "play_audio", 4096 * 4, this, 4, NULL);
|
||||
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
wake_word_detect_.Initialize(audio_device_.input_channels(), audio_device_.input_reference());
|
||||
wake_word_detect_.OnVadStateChange([this](bool speaking) {
|
||||
Schedule([this, speaking]() {
|
||||
auto& builtin_led = BuiltinLed::GetInstance();
|
||||
@@ -272,6 +296,7 @@ void Application::Start() {
|
||||
});
|
||||
wake_word_detect_.StartDetection();
|
||||
|
||||
audio_processor_.Initialize(audio_device_.input_channels(), audio_device_.input_reference());
|
||||
audio_processor_.OnOutput([this](std::vector<int16_t>&& data) {
|
||||
Schedule([this, data = std::move(data)]() {
|
||||
if (chat_state_ == kChatStateListening) {
|
||||
@@ -317,7 +342,7 @@ void Application::Start() {
|
||||
Schedule([this]() {
|
||||
auto volume = audio_device_.output_volume() + 10;
|
||||
if (volume > 100) {
|
||||
volume = 0;
|
||||
volume = 100;
|
||||
}
|
||||
audio_device_.SetOutputVolume(volume);
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
@@ -327,6 +352,28 @@ void Application::Start() {
|
||||
});
|
||||
|
||||
volume_up_button_.OnLongPress([this]() {
|
||||
Schedule([this]() {
|
||||
audio_device_.SetOutputVolume(100);
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
display_.ShowNotification("Volume\n100");
|
||||
#endif
|
||||
});
|
||||
});
|
||||
|
||||
volume_down_button_.OnClick([this]() {
|
||||
Schedule([this]() {
|
||||
auto volume = audio_device_.output_volume() - 10;
|
||||
if (volume < 0) {
|
||||
volume = 0;
|
||||
}
|
||||
audio_device_.SetOutputVolume(volume);
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
display_.ShowNotification("Volume\n" + std::to_string(volume));
|
||||
#endif
|
||||
});
|
||||
});
|
||||
|
||||
volume_down_button_.OnLongPress([this]() {
|
||||
Schedule([this]() {
|
||||
audio_device_.SetOutputVolume(0);
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
@@ -449,10 +496,12 @@ BinaryProtocol* Application::AllocateBinaryProtocol(const uint8_t* payload, size
|
||||
|
||||
void Application::AudioEncodeTask() {
|
||||
ESP_LOGI(TAG, "Audio encode task started");
|
||||
const int max_audio_play_queue_size_ = 2;
|
||||
|
||||
while (true) {
|
||||
std::unique_lock<std::mutex> lock(mutex_);
|
||||
cv_.wait(lock, [this]() {
|
||||
return !audio_encode_queue_.empty() || !audio_decode_queue_.empty();
|
||||
return !audio_encode_queue_.empty() || (!audio_decode_queue_.empty() && audio_play_queue_.size() < max_audio_play_queue_size_);
|
||||
});
|
||||
|
||||
if (!audio_encode_queue_.empty()) {
|
||||
@@ -488,9 +537,9 @@ void Application::AudioEncodeTask() {
|
||||
}
|
||||
|
||||
if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
|
||||
int target_size = opus_resampler_.GetOutputSamples(frame_size);
|
||||
int target_size = output_resampler_.GetOutputSamples(frame_size);
|
||||
std::vector<int16_t> resampled(target_size);
|
||||
opus_resampler_.Process(packet->pcm.data(), frame_size, resampled.data());
|
||||
output_resampler_.Process(packet->pcm.data(), frame_size, resampled.data());
|
||||
packet->pcm = std::move(resampled);
|
||||
}
|
||||
|
||||
@@ -513,7 +562,6 @@ void Application::HandleAudioPacket(AudioPacket* packet) {
|
||||
audio_device_.OutputData(packet->pcm);
|
||||
|
||||
if (break_speaking_) {
|
||||
break_speaking_ = false;
|
||||
skip_to_end_ = true;
|
||||
|
||||
// Play a silence and skip to the end
|
||||
@@ -525,12 +573,13 @@ void Application::HandleAudioPacket(AudioPacket* packet) {
|
||||
break;
|
||||
}
|
||||
case kAudioPacketTypeStart:
|
||||
break_speaking_ = false;
|
||||
skip_to_end_ = false;
|
||||
Schedule([this]() {
|
||||
SetChatState(kChatStateSpeaking);
|
||||
});
|
||||
break;
|
||||
case kAudioPacketTypeStop:
|
||||
skip_to_end_ = false;
|
||||
Schedule([this]() {
|
||||
SetChatState(kChatStateListening);
|
||||
});
|
||||
@@ -558,6 +607,7 @@ void Application::AudioPlayTask() {
|
||||
});
|
||||
auto packet = std::move(audio_play_queue_.front());
|
||||
audio_play_queue_.pop_front();
|
||||
cv_.notify_all();
|
||||
lock.unlock();
|
||||
|
||||
HandleAudioPacket(packet);
|
||||
@@ -574,7 +624,7 @@ void Application::SetDecodeSampleRate(int sample_rate) {
|
||||
opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL);
|
||||
if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
|
||||
ESP_LOGI(TAG, "Resampling audio from %d to %d", opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
opus_resampler_.Configure(opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
output_resampler_.Configure(opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -607,7 +657,7 @@ void Application::StartWebSocketClient() {
|
||||
std::string message = "{";
|
||||
message += "\"type\":\"hello\",";
|
||||
message += "\"audio_params\":{";
|
||||
message += "\"format\":\"opus\", \"sample_rate\":" + std::to_string(CONFIG_AUDIO_INPUT_SAMPLE_RATE) + ", \"channels\":1";
|
||||
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1";
|
||||
message += "}}";
|
||||
ws_client_->Send(message);
|
||||
});
|
||||
@@ -640,6 +690,10 @@ void Application::StartWebSocketClient() {
|
||||
if (sample_rate != NULL) {
|
||||
SetDecodeSampleRate(sample_rate->valueint);
|
||||
}
|
||||
|
||||
// If the device is speaking, we need to break the speaking
|
||||
break_speaking_ = true;
|
||||
skip_to_end_ = true;
|
||||
} else if (strcmp(state->valuestring, "stop") == 0) {
|
||||
packet->type = kAudioPacketTypeStop;
|
||||
} else if (strcmp(state->valuestring, "sentence_end") == 0) {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef _APPLICATION_H_
|
||||
#define _APPLICATION_H_
|
||||
|
||||
#include "AudioDevice.h"
|
||||
#include <OpusEncoder.h>
|
||||
#include <OpusResampler.h>
|
||||
#include <WebSocket.h>
|
||||
@@ -17,6 +16,7 @@
|
||||
#include <list>
|
||||
#include <condition_variable>
|
||||
|
||||
#include "BoxAudioDevice.h"
|
||||
#include "Display.h"
|
||||
#include "FirmwareUpgrade.h"
|
||||
|
||||
@@ -86,7 +86,15 @@ private:
|
||||
|
||||
Button boot_button_;
|
||||
Button volume_up_button_;
|
||||
Button volume_down_button_;
|
||||
#ifdef CONFIG_AUDIO_CODEC_ES8311_ES7210
|
||||
BoxAudioDevice audio_device_;
|
||||
#else
|
||||
AudioDevice audio_device_;
|
||||
#endif
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
Display display_;
|
||||
#endif
|
||||
#ifdef CONFIG_USE_AFE_SR
|
||||
WakeWordDetect wake_word_detect_;
|
||||
AudioProcessor audio_processor_;
|
||||
@@ -98,9 +106,6 @@ private:
|
||||
EspHttp http_;
|
||||
#endif
|
||||
FirmwareUpgrade firmware_upgrade_;
|
||||
#ifdef CONFIG_USE_DISPLAY
|
||||
Display display_;
|
||||
#endif
|
||||
std::mutex mutex_;
|
||||
std::condition_variable_any cv_;
|
||||
std::list<std::function<void()>> main_tasks_;
|
||||
@@ -123,7 +128,8 @@ private:
|
||||
|
||||
int opus_duration_ms_ = 60;
|
||||
int opus_decode_sample_rate_ = CONFIG_AUDIO_OUTPUT_SAMPLE_RATE;
|
||||
OpusResampler opus_resampler_;
|
||||
OpusResampler input_resampler_;
|
||||
OpusResampler output_resampler_;
|
||||
|
||||
TaskHandle_t check_new_version_task_ = nullptr;
|
||||
StaticTask_t check_new_version_task_buffer_;
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
#include <cmath>
|
||||
#define TAG "AudioDevice"
|
||||
|
||||
AudioDevice::AudioDevice() {
|
||||
AudioDevice::AudioDevice()
|
||||
: input_sample_rate_(CONFIG_AUDIO_INPUT_SAMPLE_RATE),
|
||||
output_sample_rate_(CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) {
|
||||
}
|
||||
|
||||
AudioDevice::~AudioDevice() {
|
||||
@@ -19,26 +21,16 @@ AudioDevice::~AudioDevice() {
|
||||
}
|
||||
}
|
||||
|
||||
void AudioDevice::Start(int input_sample_rate, int output_sample_rate) {
|
||||
input_sample_rate_ = input_sample_rate;
|
||||
output_sample_rate_ = output_sample_rate;
|
||||
|
||||
#ifdef CONFIG_AUDIO_DEVICE_I2S_SIMPLEX
|
||||
CreateSimplexChannels();
|
||||
void AudioDevice::Initialize() {
|
||||
#ifdef CONFIG_AUDIO_I2S_METHOD_SIMPLEX
|
||||
CreateSimplexChannels();
|
||||
#else
|
||||
CreateDuplexChannels();
|
||||
CreateDuplexChannels();
|
||||
#endif
|
||||
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
|
||||
|
||||
xTaskCreate([](void* arg) {
|
||||
auto audio_device = (AudioDevice*)arg;
|
||||
audio_device->InputTask();
|
||||
}, "audio_input", 4096 * 2, this, 5, &audio_input_task_);
|
||||
}
|
||||
|
||||
void AudioDevice::CreateDuplexChannels() {
|
||||
#ifdef CONFIG_AUDIO_I2S_METHOD_DUPLEX
|
||||
duplex_ = true;
|
||||
|
||||
i2s_chan_config_t chan_cfg = {
|
||||
@@ -73,10 +65,10 @@ void AudioDevice::CreateDuplexChannels() {
|
||||
},
|
||||
.gpio_cfg = {
|
||||
.mclk = I2S_GPIO_UNUSED,
|
||||
.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_BCLK,
|
||||
.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_WS,
|
||||
.dout = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_SPK_GPIO_DOUT,
|
||||
.din = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_DIN,
|
||||
.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_BCLK,
|
||||
.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_LRCK,
|
||||
.dout = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DOUT,
|
||||
.din = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DIN,
|
||||
.invert_flags = {
|
||||
.mclk_inv = false,
|
||||
.bclk_inv = false,
|
||||
@@ -86,11 +78,14 @@ void AudioDevice::CreateDuplexChannels() {
|
||||
};
|
||||
ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_handle_, &std_cfg));
|
||||
ESP_ERROR_CHECK(i2s_channel_init_std_mode(rx_handle_, &std_cfg));
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
|
||||
ESP_LOGI(TAG, "Duplex channels created");
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_AUDIO_DEVICE_I2S_SIMPLEX
|
||||
void AudioDevice::CreateSimplexChannels() {
|
||||
#ifdef CONFIG_AUDIO_I2S_METHOD_SIMPLEX
|
||||
// Create a new channel for speaker
|
||||
i2s_chan_config_t chan_cfg = {
|
||||
.id = I2S_NUM_0,
|
||||
@@ -125,7 +120,7 @@ void AudioDevice::CreateSimplexChannels() {
|
||||
.gpio_cfg = {
|
||||
.mclk = I2S_GPIO_UNUSED,
|
||||
.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_SPK_GPIO_BCLK,
|
||||
.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_SPK_GPIO_WS,
|
||||
.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_SPK_GPIO_LRCK,
|
||||
.dout = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_SPK_GPIO_DOUT,
|
||||
.din = I2S_GPIO_UNUSED,
|
||||
.invert_flags = {
|
||||
@@ -141,16 +136,19 @@ void AudioDevice::CreateSimplexChannels() {
|
||||
chan_cfg.id = I2S_NUM_1;
|
||||
ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, nullptr, &rx_handle_));
|
||||
std_cfg.clk_cfg.sample_rate_hz = (uint32_t)input_sample_rate_;
|
||||
std_cfg.gpio_cfg.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_BCLK;
|
||||
std_cfg.gpio_cfg.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_SCK;
|
||||
std_cfg.gpio_cfg.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_WS;
|
||||
std_cfg.gpio_cfg.dout = I2S_GPIO_UNUSED;
|
||||
std_cfg.gpio_cfg.din = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_DIN;
|
||||
ESP_ERROR_CHECK(i2s_channel_init_std_mode(rx_handle_, &std_cfg));
|
||||
ESP_LOGI(TAG, "Simplex channels created");
|
||||
}
|
||||
#endif
|
||||
|
||||
void AudioDevice::Write(const int16_t* data, int samples) {
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
|
||||
ESP_LOGI(TAG, "Simplex channels created");
|
||||
#endif
|
||||
}
|
||||
|
||||
int AudioDevice::Write(const int16_t* data, int samples) {
|
||||
int32_t buffer[samples];
|
||||
|
||||
// output_volume_: 0-100
|
||||
@@ -162,6 +160,7 @@ void AudioDevice::Write(const int16_t* data, int samples) {
|
||||
|
||||
size_t bytes_written;
|
||||
ESP_ERROR_CHECK(i2s_channel_write(tx_handle_, buffer, samples * sizeof(int32_t), &bytes_written, portMAX_DELAY));
|
||||
return bytes_written / sizeof(int32_t);
|
||||
}
|
||||
|
||||
int AudioDevice::Read(int16_t* dest, int samples) {
|
||||
@@ -181,8 +180,16 @@ int AudioDevice::Read(int16_t* dest, int samples) {
|
||||
return samples;
|
||||
}
|
||||
|
||||
void AudioDevice::OnInputData(std::function<void(const int16_t*, int)> callback) {
|
||||
void AudioDevice::OnInputData(std::function<void(std::vector<int16_t>&& data)> callback) {
|
||||
on_input_data_ = callback;
|
||||
|
||||
// 创建音频输入任务
|
||||
if (audio_input_task_ == nullptr) {
|
||||
xTaskCreate([](void* arg) {
|
||||
auto audio_device = (AudioDevice*)arg;
|
||||
audio_device->InputTask();
|
||||
}, "audio_input", 4096 * 2, this, 3, &audio_input_task_);
|
||||
}
|
||||
}
|
||||
|
||||
void AudioDevice::OutputData(std::vector<int16_t>& data) {
|
||||
@@ -191,12 +198,14 @@ void AudioDevice::OutputData(std::vector<int16_t>& data) {
|
||||
|
||||
void AudioDevice::InputTask() {
|
||||
int duration = 30;
|
||||
int input_frame_size = input_sample_rate_ / 1000 * duration;
|
||||
int16_t input_buffer[input_frame_size];
|
||||
int input_frame_size = input_sample_rate_ / 1000 * duration * input_channels_;
|
||||
while (true) {
|
||||
int samples = Read(input_buffer, input_frame_size);
|
||||
std::vector<int16_t> input_data(input_frame_size);
|
||||
int samples = Read(input_data.data(), input_data.size());
|
||||
if (samples > 0) {
|
||||
on_input_data_(input_buffer, samples);
|
||||
if (on_input_data_) {
|
||||
on_input_data_(std::move(input_data));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#define _AUDIO_DEVICE_H
|
||||
|
||||
#include <freertos/FreeRTOS.h>
|
||||
#include <freertos/event_groups.h>
|
||||
#include <driver/i2s_std.h>
|
||||
|
||||
#include <vector>
|
||||
@@ -12,35 +11,42 @@
|
||||
class AudioDevice {
|
||||
public:
|
||||
AudioDevice();
|
||||
~AudioDevice();
|
||||
virtual ~AudioDevice();
|
||||
virtual void Initialize();
|
||||
|
||||
void Start(int input_sample_rate, int output_sample_rate);
|
||||
void OnInputData(std::function<void(const int16_t*, int)> callback);
|
||||
void OnInputData(std::function<void(std::vector<int16_t>&& data)> callback);
|
||||
void OutputData(std::vector<int16_t>& data);
|
||||
void SetOutputVolume(int volume);
|
||||
virtual void SetOutputVolume(int volume);
|
||||
|
||||
inline bool duplex() const { return duplex_; }
|
||||
inline bool input_reference() const { return input_reference_; }
|
||||
inline int input_sample_rate() const { return input_sample_rate_; }
|
||||
inline int output_sample_rate() const { return output_sample_rate_; }
|
||||
inline int input_channels() const { return input_channels_; }
|
||||
inline int output_channels() const { return output_channels_; }
|
||||
inline int output_volume() const { return output_volume_; }
|
||||
|
||||
int input_sample_rate() const { return input_sample_rate_; }
|
||||
int output_sample_rate() const { return output_sample_rate_; }
|
||||
bool duplex() const { return duplex_; }
|
||||
int output_volume() const { return output_volume_; }
|
||||
private:
|
||||
TaskHandle_t audio_input_task_ = nullptr;
|
||||
std::function<void(std::vector<int16_t>&& data)> on_input_data_;
|
||||
|
||||
void InputTask();
|
||||
void CreateSimplexChannels();
|
||||
|
||||
protected:
|
||||
bool duplex_ = false;
|
||||
bool input_reference_ = false;
|
||||
int input_sample_rate_ = 0;
|
||||
int output_sample_rate_ = 0;
|
||||
int output_volume_ = 80;
|
||||
int input_channels_ = 1;
|
||||
int output_channels_ = 1;
|
||||
int output_volume_ = 70;
|
||||
i2s_chan_handle_t tx_handle_ = nullptr;
|
||||
i2s_chan_handle_t rx_handle_ = nullptr;
|
||||
|
||||
TaskHandle_t audio_input_task_ = nullptr;
|
||||
|
||||
EventGroupHandle_t event_group_;
|
||||
std::function<void(const int16_t*, int)> on_input_data_;
|
||||
|
||||
void CreateDuplexChannels();
|
||||
void CreateSimplexChannels();
|
||||
void InputTask();
|
||||
int Read(int16_t* dest, int samples);
|
||||
void Write(const int16_t* data, int samples);
|
||||
virtual void CreateDuplexChannels();
|
||||
virtual int Read(int16_t* dest, int samples);
|
||||
virtual int Write(const int16_t* data, int samples);
|
||||
};
|
||||
|
||||
#endif // _AUDIO_DEVICE_H
|
||||
|
||||
@@ -8,6 +8,12 @@ static const char* TAG = "AudioProcessor";
|
||||
AudioProcessor::AudioProcessor()
|
||||
: afe_communication_data_(nullptr) {
|
||||
event_group_ = xEventGroupCreate();
|
||||
}
|
||||
|
||||
void AudioProcessor::Initialize(int channels, bool reference) {
|
||||
channels_ = channels;
|
||||
reference_ = reference;
|
||||
int ref_num = reference_ ? 1 : 0;
|
||||
|
||||
afe_config_t afe_config = {
|
||||
.aec_init = false,
|
||||
@@ -22,17 +28,17 @@ AudioProcessor::AudioProcessor()
|
||||
.wakenet_model_name_2 = NULL,
|
||||
.wakenet_mode = DET_MODE_90,
|
||||
.afe_mode = SR_MODE_HIGH_PERF,
|
||||
.afe_perferred_core = 0,
|
||||
.afe_perferred_priority = 5,
|
||||
.afe_perferred_core = 1,
|
||||
.afe_perferred_priority = 1,
|
||||
.afe_ringbuf_size = 50,
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||
.afe_linear_gain = 1.0,
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||
.pcm_config = {
|
||||
.total_ch_num = 1,
|
||||
.mic_num = 1,
|
||||
.ref_num = 0,
|
||||
.sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE,
|
||||
.total_ch_num = channels_,
|
||||
.mic_num = channels_ - ref_num,
|
||||
.ref_num = ref_num,
|
||||
.sample_rate = 16000,
|
||||
},
|
||||
.debug_init = false,
|
||||
.debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }},
|
||||
@@ -47,7 +53,7 @@ AudioProcessor::AudioProcessor()
|
||||
auto this_ = (AudioProcessor*)arg;
|
||||
this_->AudioProcessorTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_communication", 4096 * 2, this, 5, NULL);
|
||||
}, "audio_communication", 4096 * 2, this, 1, NULL);
|
||||
}
|
||||
|
||||
AudioProcessor::~AudioProcessor() {
|
||||
@@ -57,10 +63,10 @@ AudioProcessor::~AudioProcessor() {
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
void AudioProcessor::Input(const int16_t* data, int size) {
|
||||
input_buffer_.insert(input_buffer_.end(), data, data + size);
|
||||
void AudioProcessor::Input(std::vector<int16_t>& data) {
|
||||
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
|
||||
|
||||
auto chunk_size = esp_afe_vc_v1.get_feed_chunksize(afe_communication_data_);
|
||||
auto chunk_size = esp_afe_vc_v1.get_feed_chunksize(afe_communication_data_) * channels_;
|
||||
while (input_buffer_.size() >= chunk_size) {
|
||||
auto chunk = input_buffer_.data();
|
||||
esp_afe_vc_v1.feed(afe_communication_data_, chunk);
|
||||
@@ -92,6 +98,9 @@ void AudioProcessor::AudioProcessorTask() {
|
||||
xEventGroupWaitBits(event_group_, PROCESSOR_RUNNING, pdFALSE, pdTRUE, portMAX_DELAY);
|
||||
|
||||
auto res = esp_afe_vc_v1.fetch(afe_communication_data_);
|
||||
if ((xEventGroupGetBits(event_group_) & PROCESSOR_RUNNING) == 0) {
|
||||
continue;
|
||||
}
|
||||
if (res == nullptr || res->ret_value == ESP_FAIL) {
|
||||
if (res != nullptr) {
|
||||
ESP_LOGI(TAG, "Error code: %d", res->ret_value);
|
||||
|
||||
@@ -15,7 +15,8 @@ public:
|
||||
AudioProcessor();
|
||||
~AudioProcessor();
|
||||
|
||||
void Input(const int16_t* data, int size);
|
||||
void Initialize(int channels, bool reference);
|
||||
void Input(std::vector<int16_t>& data);
|
||||
void Start();
|
||||
void Stop();
|
||||
bool IsRunning();
|
||||
@@ -26,6 +27,8 @@ private:
|
||||
esp_afe_sr_data_t* afe_communication_data_ = nullptr;
|
||||
std::vector<int16_t> input_buffer_;
|
||||
std::function<void(std::vector<int16_t>&& data)> output_callback_;
|
||||
int channels_;
|
||||
bool reference_;
|
||||
|
||||
void AudioProcessorTask();
|
||||
};
|
||||
|
||||
232
main/BoxAudioDevice.cc
Normal file
232
main/BoxAudioDevice.cc
Normal file
@@ -0,0 +1,232 @@
|
||||
#include "BoxAudioDevice.h"
|
||||
#include <esp_log.h>
|
||||
#include <cassert>
|
||||
|
||||
static const char* TAG = "BoxAudioDevice";
|
||||
|
||||
BoxAudioDevice::BoxAudioDevice() {
|
||||
}
|
||||
|
||||
BoxAudioDevice::~BoxAudioDevice() {
|
||||
ESP_ERROR_CHECK(esp_codec_dev_close(output_dev_));
|
||||
esp_codec_dev_delete(output_dev_);
|
||||
ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_));
|
||||
esp_codec_dev_delete(input_dev_);
|
||||
|
||||
audio_codec_delete_codec_if(in_codec_if_);
|
||||
audio_codec_delete_ctrl_if(in_ctrl_if_);
|
||||
audio_codec_delete_codec_if(out_codec_if_);
|
||||
audio_codec_delete_ctrl_if(out_ctrl_if_);
|
||||
audio_codec_delete_gpio_if(gpio_if_);
|
||||
audio_codec_delete_data_if(data_if_);
|
||||
|
||||
ESP_ERROR_CHECK(i2c_del_master_bus(i2c_master_handle_));
|
||||
}
|
||||
|
||||
void BoxAudioDevice::Initialize() {
|
||||
duplex_ = true; // 是否双工
|
||||
input_reference_ = CONFIG_AUDIO_CODEC_INPUT_REFERENCE; // 是否使用参考输入,实现回声消除
|
||||
input_channels_ = input_reference_ ? 2 : 1; // 输入通道数
|
||||
|
||||
// Initialize I2C peripheral
|
||||
i2c_master_bus_config_t i2c_bus_cfg = {
|
||||
.i2c_port = I2C_NUM_0,
|
||||
.sda_io_num = (gpio_num_t)CONFIG_AUDIO_CODEC_I2C_SDA_PIN,
|
||||
.scl_io_num = (gpio_num_t)CONFIG_AUDIO_CODEC_I2C_SCL_PIN,
|
||||
.clk_source = I2C_CLK_SRC_DEFAULT,
|
||||
.glitch_ignore_cnt = 7,
|
||||
.intr_priority = 0,
|
||||
.trans_queue_depth = 0,
|
||||
.flags = {
|
||||
.enable_internal_pullup = 1,
|
||||
},
|
||||
};
|
||||
ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_master_handle_));
|
||||
|
||||
CreateDuplexChannels();
|
||||
|
||||
// Do initialize of related interface: data_if, ctrl_if and gpio_if
|
||||
audio_codec_i2s_cfg_t i2s_cfg = {
|
||||
.port = I2S_NUM_0,
|
||||
.rx_handle = rx_handle_,
|
||||
.tx_handle = tx_handle_,
|
||||
};
|
||||
data_if_ = audio_codec_new_i2s_data(&i2s_cfg);
|
||||
assert(data_if_ != NULL);
|
||||
|
||||
// Output
|
||||
audio_codec_i2c_cfg_t i2c_cfg = {
|
||||
.port = I2C_NUM_0,
|
||||
.addr = ES8311_CODEC_DEFAULT_ADDR,
|
||||
.bus_handle = i2c_master_handle_,
|
||||
};
|
||||
out_ctrl_if_ = audio_codec_new_i2c_ctrl(&i2c_cfg);
|
||||
assert(out_ctrl_if_ != NULL);
|
||||
|
||||
gpio_if_ = audio_codec_new_gpio();
|
||||
assert(gpio_if_ != NULL);
|
||||
|
||||
es8311_codec_cfg_t es8311_cfg = {};
|
||||
es8311_cfg.ctrl_if = out_ctrl_if_;
|
||||
es8311_cfg.gpio_if = gpio_if_;
|
||||
es8311_cfg.codec_mode = ESP_CODEC_DEV_WORK_MODE_DAC;
|
||||
es8311_cfg.pa_pin = CONFIG_AUDIO_CODEC_PA_PIN;
|
||||
es8311_cfg.use_mclk = true;
|
||||
es8311_cfg.hw_gain.pa_voltage = 5.0;
|
||||
es8311_cfg.hw_gain.codec_dac_voltage = 3.3;
|
||||
out_codec_if_ = es8311_codec_new(&es8311_cfg);
|
||||
assert(out_codec_if_ != NULL);
|
||||
|
||||
esp_codec_dev_cfg_t dev_cfg = {
|
||||
.dev_type = ESP_CODEC_DEV_TYPE_OUT,
|
||||
.codec_if = out_codec_if_,
|
||||
.data_if = data_if_,
|
||||
};
|
||||
output_dev_ = esp_codec_dev_new(&dev_cfg);
|
||||
assert(output_dev_ != NULL);
|
||||
|
||||
ESP_ERROR_CHECK(esp_codec_dev_set_out_vol(output_dev_, output_volume_));
|
||||
|
||||
// Play 16bit 1 channel
|
||||
esp_codec_dev_sample_info_t fs = {
|
||||
.bits_per_sample = 16,
|
||||
.channel = 1,
|
||||
.channel_mask = 0,
|
||||
.sample_rate = (uint32_t)output_sample_rate_,
|
||||
.mclk_multiple = 0,
|
||||
};
|
||||
ESP_ERROR_CHECK(esp_codec_dev_open(output_dev_, &fs));
|
||||
|
||||
// Input
|
||||
i2c_cfg.addr = ES7210_CODEC_DEFAULT_ADDR;
|
||||
in_ctrl_if_ = audio_codec_new_i2c_ctrl(&i2c_cfg);
|
||||
assert(in_ctrl_if_ != NULL);
|
||||
|
||||
es7210_codec_cfg_t es7210_cfg = {};
|
||||
es7210_cfg.ctrl_if = in_ctrl_if_;
|
||||
es7210_cfg.mic_selected = ES7120_SEL_MIC1 | ES7120_SEL_MIC2 | ES7120_SEL_MIC3 | ES7120_SEL_MIC4;
|
||||
in_codec_if_ = es7210_codec_new(&es7210_cfg);
|
||||
assert(in_codec_if_ != NULL);
|
||||
|
||||
dev_cfg.dev_type = ESP_CODEC_DEV_TYPE_IN;
|
||||
dev_cfg.codec_if = in_codec_if_;
|
||||
input_dev_ = esp_codec_dev_new(&dev_cfg);
|
||||
assert(input_dev_ != NULL);
|
||||
|
||||
fs.channel = 4;
|
||||
if (input_channels_ == 1) {
|
||||
fs.channel_mask = ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0);
|
||||
} else {
|
||||
fs.channel_mask = ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0) | ESP_CODEC_DEV_MAKE_CHANNEL_MASK(1);
|
||||
}
|
||||
ESP_ERROR_CHECK(esp_codec_dev_open(input_dev_, &fs));
|
||||
|
||||
ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), 30.0));
|
||||
|
||||
ESP_LOGI(TAG, "BoxAudioDevice initialized");
|
||||
}
|
||||
|
||||
void BoxAudioDevice::CreateDuplexChannels() {
|
||||
assert(input_sample_rate_ == output_sample_rate_);
|
||||
|
||||
i2s_chan_config_t chan_cfg = {
|
||||
.id = I2S_NUM_0,
|
||||
.role = I2S_ROLE_MASTER,
|
||||
.dma_desc_num = 6,
|
||||
.dma_frame_num = 240,
|
||||
.auto_clear_after_cb = true,
|
||||
.auto_clear_before_cb = false,
|
||||
.intr_priority = 0,
|
||||
};
|
||||
ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, &tx_handle_, &rx_handle_));
|
||||
|
||||
i2s_std_config_t std_cfg = {
|
||||
.clk_cfg = {
|
||||
.sample_rate_hz = (uint32_t)output_sample_rate_,
|
||||
.clk_src = I2S_CLK_SRC_DEFAULT,
|
||||
.ext_clk_freq_hz = 0,
|
||||
.mclk_multiple = I2S_MCLK_MULTIPLE_256
|
||||
},
|
||||
.slot_cfg = {
|
||||
.data_bit_width = I2S_DATA_BIT_WIDTH_16BIT,
|
||||
.slot_bit_width = I2S_SLOT_BIT_WIDTH_AUTO,
|
||||
.slot_mode = I2S_SLOT_MODE_STEREO,
|
||||
.slot_mask = I2S_STD_SLOT_BOTH,
|
||||
.ws_width = I2S_DATA_BIT_WIDTH_16BIT,
|
||||
.ws_pol = false,
|
||||
.bit_shift = true,
|
||||
.left_align = true,
|
||||
.big_endian = false,
|
||||
.bit_order_lsb = false
|
||||
},
|
||||
.gpio_cfg = {
|
||||
.mclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_MCLK,
|
||||
.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_BCLK,
|
||||
.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_LRCK,
|
||||
.dout = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DOUT,
|
||||
.din = I2S_GPIO_UNUSED,
|
||||
.invert_flags = {
|
||||
.mclk_inv = false,
|
||||
.bclk_inv = false,
|
||||
.ws_inv = false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
i2s_tdm_config_t tdm_cfg = {
|
||||
.clk_cfg = {
|
||||
.sample_rate_hz = (uint32_t)input_sample_rate_,
|
||||
.clk_src = I2S_CLK_SRC_DEFAULT,
|
||||
.ext_clk_freq_hz = 0,
|
||||
.mclk_multiple = I2S_MCLK_MULTIPLE_256,
|
||||
.bclk_div = 8,
|
||||
},
|
||||
.slot_cfg = {
|
||||
.data_bit_width = I2S_DATA_BIT_WIDTH_16BIT,
|
||||
.slot_bit_width = I2S_SLOT_BIT_WIDTH_AUTO,
|
||||
.slot_mode = I2S_SLOT_MODE_STEREO,
|
||||
.slot_mask = i2s_tdm_slot_mask_t(I2S_TDM_SLOT0 | I2S_TDM_SLOT1 | I2S_TDM_SLOT2 | I2S_TDM_SLOT3),
|
||||
.ws_width = I2S_TDM_AUTO_WS_WIDTH,
|
||||
.ws_pol = false,
|
||||
.bit_shift = true,
|
||||
.left_align = false,
|
||||
.big_endian = false,
|
||||
.bit_order_lsb = false,
|
||||
.skip_mask = false,
|
||||
.total_slot = I2S_TDM_AUTO_SLOT_NUM
|
||||
},
|
||||
.gpio_cfg = {
|
||||
.mclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_MCLK,
|
||||
.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_BCLK,
|
||||
.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_LRCK,
|
||||
.dout = I2S_GPIO_UNUSED,
|
||||
.din = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DIN,
|
||||
.invert_flags = {
|
||||
.mclk_inv = false,
|
||||
.bclk_inv = false,
|
||||
.ws_inv = false
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_handle_, &std_cfg));
|
||||
ESP_ERROR_CHECK(i2s_channel_init_tdm_mode(rx_handle_, &tdm_cfg));
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_));
|
||||
ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_));
|
||||
ESP_LOGI(TAG, "Duplex channels created");
|
||||
}
|
||||
|
||||
int BoxAudioDevice::Read(int16_t *buffer, int samples) {
|
||||
ESP_ERROR_CHECK(esp_codec_dev_read(input_dev_, (void*)buffer, samples * sizeof(int16_t)));
|
||||
return samples;
|
||||
}
|
||||
|
||||
int BoxAudioDevice::Write(const int16_t *buffer, int samples) {
|
||||
ESP_ERROR_CHECK(esp_codec_dev_write(output_dev_, (void*)buffer, samples * sizeof(int16_t)));
|
||||
return samples;
|
||||
}
|
||||
|
||||
void BoxAudioDevice::SetOutputVolume(int volume) {
|
||||
ESP_ERROR_CHECK(esp_codec_dev_set_out_vol(output_dev_, volume));
|
||||
AudioDevice::SetOutputVolume(volume);
|
||||
}
|
||||
36
main/BoxAudioDevice.h
Normal file
36
main/BoxAudioDevice.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef _BOX_AUDIO_DEVICE_H
|
||||
#define _BOX_AUDIO_DEVICE_H
|
||||
|
||||
#include "AudioDevice.h"
|
||||
#include <driver/i2c_master.h>
|
||||
#include <driver/i2s_tdm.h>
|
||||
#include <esp_codec_dev.h>
|
||||
#include <esp_codec_dev_defaults.h>
|
||||
|
||||
|
||||
class BoxAudioDevice : public AudioDevice {
|
||||
public:
|
||||
BoxAudioDevice();
|
||||
virtual ~BoxAudioDevice();
|
||||
void Initialize() override;
|
||||
void SetOutputVolume(int volume) override;
|
||||
|
||||
private:
|
||||
i2c_master_bus_handle_t i2c_master_handle_ = nullptr;
|
||||
|
||||
const audio_codec_data_if_t* data_if_ = nullptr;
|
||||
const audio_codec_ctrl_if_t* out_ctrl_if_ = nullptr;
|
||||
const audio_codec_if_t* out_codec_if_ = nullptr;
|
||||
const audio_codec_ctrl_if_t* in_ctrl_if_ = nullptr;
|
||||
const audio_codec_if_t* in_codec_if_ = nullptr;
|
||||
const audio_codec_gpio_if_t* gpio_if_ = nullptr;
|
||||
|
||||
esp_codec_dev_handle_t output_dev_ = nullptr;
|
||||
esp_codec_dev_handle_t input_dev_ = nullptr;
|
||||
|
||||
void CreateDuplexChannels() override;
|
||||
int Read(int16_t* dest, int samples) override;
|
||||
int Write(const int16_t* data, int samples) override;
|
||||
};
|
||||
|
||||
#endif // _BOX_AUDIO_DEVICE_H
|
||||
@@ -11,6 +11,9 @@ set(SOURCES "AudioDevice.cc"
|
||||
if(CONFIG_USE_AFE_SR)
|
||||
list(APPEND SOURCES "AudioProcessor.cc" "WakeWordDetect.cc")
|
||||
endif()
|
||||
if(CONFIG_AUDIO_CODEC_ES8311_ES7210)
|
||||
list(APPEND SOURCES "BoxAudioDevice.cc")
|
||||
endif()
|
||||
|
||||
idf_component_register(SRCS ${SOURCES}
|
||||
INCLUDE_DIRS "."
|
||||
|
||||
@@ -17,7 +17,7 @@ Display::Display(int sda_pin, int scl_pin) : sda_pin_(sda_pin), scl_pin_(scl_pin
|
||||
ESP_LOGI(TAG, "Display Pins: %d, %d", sda_pin_, scl_pin_);
|
||||
|
||||
i2c_master_bus_config_t bus_config = {
|
||||
.i2c_port = I2C_NUM_0,
|
||||
.i2c_port = I2C_NUM_1,
|
||||
.sda_io_num = (gpio_num_t)sda_pin_,
|
||||
.scl_io_num = (gpio_num_t)scl_pin_,
|
||||
.clk_source = I2C_CLK_SRC_DEFAULT,
|
||||
|
||||
@@ -30,49 +30,136 @@ config AUDIO_OUTPUT_SAMPLE_RATE
|
||||
help
|
||||
Audio output sample rate.
|
||||
|
||||
config AUDIO_DEVICE_I2S_MIC_GPIO_WS
|
||||
int "I2S GPIO WS"
|
||||
default 4
|
||||
choice AUDIO_CODEC
|
||||
prompt "Audio Codec"
|
||||
default AUDIO_CODEC_NONE
|
||||
help
|
||||
GPIO number of the I2S WS.
|
||||
Audio codec.
|
||||
config AUDIO_CODEC_ES8311_ES7210
|
||||
bool "Box: ES8311 + ES7210"
|
||||
config AUDIO_CODEC_NONE
|
||||
bool "None"
|
||||
endchoice
|
||||
|
||||
config AUDIO_DEVICE_I2S_MIC_GPIO_BCLK
|
||||
int "I2S GPIO BCLK"
|
||||
default 5
|
||||
help
|
||||
GPIO number of the I2S BCLK.
|
||||
menu "Box Audio Codec I2C and PA Control"
|
||||
depends on AUDIO_CODEC_ES8311_ES7210
|
||||
|
||||
config AUDIO_CODEC_I2C_SDA_PIN
|
||||
int "Audio Codec I2C SDA Pin"
|
||||
default 39
|
||||
help
|
||||
Audio codec I2C SDA pin.
|
||||
|
||||
config AUDIO_DEVICE_I2S_MIC_GPIO_DIN
|
||||
int "I2S GPIO DIN"
|
||||
default 6
|
||||
help
|
||||
GPIO number of the I2S DIN.
|
||||
config AUDIO_CODEC_I2C_SCL_PIN
|
||||
int "Audio Codec I2C SCL Pin"
|
||||
default 38
|
||||
help
|
||||
Audio codec I2C SCL pin.
|
||||
|
||||
config AUDIO_CODEC_PA_PIN
|
||||
int "Audio Codec PA Pin"
|
||||
default 40
|
||||
help
|
||||
Audio codec PA pin.
|
||||
|
||||
config AUDIO_CODEC_INPUT_REFERENCE
|
||||
bool "Audio Codec Input Reference"
|
||||
default y
|
||||
help
|
||||
Audio codec input reference.
|
||||
endmenu
|
||||
|
||||
config AUDIO_DEVICE_I2S_SPK_GPIO_DOUT
|
||||
int "I2S GPIO DOUT"
|
||||
default 7
|
||||
choice AUDIO_I2S_METHOD
|
||||
prompt "Audio I2S Method"
|
||||
default AUDIO_I2S_METHOD_SIMPLEX if AUDIO_CODEC_NONE
|
||||
default AUDIO_I2S_METHOD_DUPLEX if AUDIO_CODEC_ES8311_ES7210
|
||||
help
|
||||
GPIO number of the I2S DOUT.
|
||||
|
||||
config AUDIO_DEVICE_I2S_SIMPLEX
|
||||
bool "I2S Simplex"
|
||||
default y
|
||||
help
|
||||
Enable I2S Simplex mode.
|
||||
|
||||
config AUDIO_DEVICE_I2S_SPK_GPIO_BCLK
|
||||
int "I2S SPK GPIO BCLK"
|
||||
default 15
|
||||
depends on AUDIO_DEVICE_I2S_SIMPLEX
|
||||
help
|
||||
GPIO number of the I2S MIC BCLK.
|
||||
|
||||
config AUDIO_DEVICE_I2S_SPK_GPIO_WS
|
||||
int "I2S SPK GPIO WS"
|
||||
default 16
|
||||
depends on AUDIO_DEVICE_I2S_SIMPLEX
|
||||
help
|
||||
GPIO number of the I2S MIC WS.
|
||||
Audio I2S method.
|
||||
config AUDIO_I2S_METHOD_SIMPLEX
|
||||
bool "Simplex"
|
||||
help
|
||||
Use I2S 0 as the audio input and I2S 1 as the audio output.
|
||||
config AUDIO_I2S_METHOD_DUPLEX
|
||||
bool "Duplex"
|
||||
help
|
||||
Use I2S 0 as the audio input and audio output.
|
||||
endchoice
|
||||
|
||||
menu "Audio I2S Simplex"
|
||||
depends on AUDIO_I2S_METHOD_SIMPLEX
|
||||
|
||||
config AUDIO_DEVICE_I2S_MIC_GPIO_WS
|
||||
int "I2S MIC GPIO WS"
|
||||
default 4
|
||||
help
|
||||
GPIO number of the I2S MIC WS.
|
||||
|
||||
config AUDIO_DEVICE_I2S_MIC_GPIO_SCK
|
||||
int "I2S MIC GPIO BCLK"
|
||||
default 5
|
||||
help
|
||||
GPIO number of the I2S MIC SCK.
|
||||
|
||||
config AUDIO_DEVICE_I2S_MIC_GPIO_DIN
|
||||
int "I2S MIC GPIO DIN"
|
||||
default 6
|
||||
help
|
||||
GPIO number of the I2S MIC DIN.
|
||||
|
||||
config AUDIO_DEVICE_I2S_SPK_GPIO_DOUT
|
||||
int "I2S SPK GPIO DOUT"
|
||||
default 7
|
||||
help
|
||||
GPIO number of the I2S SPK DOUT.
|
||||
|
||||
config AUDIO_DEVICE_I2S_SPK_GPIO_BCLK
|
||||
int "I2S SPK GPIO BCLK"
|
||||
default 15
|
||||
help
|
||||
GPIO number of the I2S SPK BCLK.
|
||||
|
||||
config AUDIO_DEVICE_I2S_SPK_GPIO_LRCK
|
||||
int "I2S SPK GPIO WS"
|
||||
default 16
|
||||
help
|
||||
GPIO number of the I2S SPK LRCK.
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Audio I2S Duplex"
|
||||
depends on AUDIO_I2S_METHOD_DUPLEX
|
||||
|
||||
config AUDIO_DEVICE_I2S_GPIO_MCLK
|
||||
int "I2S GPIO MCLK"
|
||||
default -1
|
||||
help
|
||||
GPIO number of the I2S WS.
|
||||
|
||||
config AUDIO_DEVICE_I2S_GPIO_LRCK
|
||||
int "I2S GPIO LRCK"
|
||||
default 4
|
||||
help
|
||||
GPIO number of the I2S LRCK.
|
||||
|
||||
config AUDIO_DEVICE_I2S_GPIO_BCLK
|
||||
int "I2S GPIO BCLK / SCLK"
|
||||
default 5
|
||||
help
|
||||
GPIO number of the I2S BCLK.
|
||||
|
||||
config AUDIO_DEVICE_I2S_GPIO_DIN
|
||||
int "I2S GPIO DIN"
|
||||
default 6
|
||||
help
|
||||
GPIO number of the I2S DIN.
|
||||
|
||||
config AUDIO_DEVICE_I2S_GPIO_DOUT
|
||||
int "I2S GPIO DOUT"
|
||||
default 7
|
||||
help
|
||||
GPIO number of the I2S DOUT.
|
||||
|
||||
endmenu
|
||||
|
||||
config BOOT_BUTTON_GPIO
|
||||
int "Boot Button GPIO"
|
||||
@@ -86,6 +173,12 @@ config VOLUME_UP_BUTTON_GPIO
|
||||
help
|
||||
GPIO number of the volume up button.
|
||||
|
||||
config VOLUME_DOWN_BUTTON_GPIO
|
||||
int "Volume Down Button GPIO"
|
||||
default 39
|
||||
help
|
||||
GPIO number of the volume down button.
|
||||
|
||||
config USE_AFE_SR
|
||||
bool "Use Espressif AFE SR"
|
||||
default y
|
||||
|
||||
@@ -15,6 +15,24 @@ WakeWordDetect::WakeWordDetect()
|
||||
wake_word_opus_() {
|
||||
|
||||
event_group_ = xEventGroupCreate();
|
||||
}
|
||||
|
||||
WakeWordDetect::~WakeWordDetect() {
|
||||
if (afe_detection_data_ != nullptr) {
|
||||
esp_afe_sr_v1.destroy(afe_detection_data_);
|
||||
}
|
||||
|
||||
if (wake_word_encode_task_stack_ != nullptr) {
|
||||
free(wake_word_encode_task_stack_);
|
||||
}
|
||||
|
||||
vEventGroupDelete(event_group_);
|
||||
}
|
||||
|
||||
void WakeWordDetect::Initialize(int channels, bool reference) {
|
||||
channels_ = channels;
|
||||
reference_ = reference;
|
||||
int ref_num = reference_ ? 1 : 0;
|
||||
|
||||
srmodel_list_t *models = esp_srmodel_init("model");
|
||||
for (int i = 0; i < models->num; i++) {
|
||||
@@ -25,7 +43,7 @@ WakeWordDetect::WakeWordDetect()
|
||||
}
|
||||
|
||||
afe_config_t afe_config = {
|
||||
.aec_init = false,
|
||||
.aec_init = reference_,
|
||||
.se_init = true,
|
||||
.vad_init = true,
|
||||
.wakenet_init = true,
|
||||
@@ -37,17 +55,17 @@ WakeWordDetect::WakeWordDetect()
|
||||
.wakenet_model_name_2 = NULL,
|
||||
.wakenet_mode = DET_MODE_90,
|
||||
.afe_mode = SR_MODE_HIGH_PERF,
|
||||
.afe_perferred_core = 0,
|
||||
.afe_perferred_priority = 5,
|
||||
.afe_perferred_core = 1,
|
||||
.afe_perferred_priority = 1,
|
||||
.afe_ringbuf_size = 50,
|
||||
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM,
|
||||
.afe_linear_gain = 1.0,
|
||||
.agc_mode = AFE_MN_PEAK_AGC_MODE_2,
|
||||
.pcm_config = {
|
||||
.total_ch_num = 1,
|
||||
.mic_num = 1,
|
||||
.ref_num = 0,
|
||||
.sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE
|
||||
.total_ch_num = channels_,
|
||||
.mic_num = channels_ - ref_num,
|
||||
.ref_num = ref_num,
|
||||
.sample_rate = 16000
|
||||
},
|
||||
.debug_init = false,
|
||||
.debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }},
|
||||
@@ -62,19 +80,7 @@ WakeWordDetect::WakeWordDetect()
|
||||
auto this_ = (WakeWordDetect*)arg;
|
||||
this_->AudioDetectionTask();
|
||||
vTaskDelete(NULL);
|
||||
}, "audio_detection", 4096 * 2, this, 5, NULL);
|
||||
}
|
||||
|
||||
WakeWordDetect::~WakeWordDetect() {
|
||||
if (afe_detection_data_ != nullptr) {
|
||||
esp_afe_sr_v1.destroy(afe_detection_data_);
|
||||
}
|
||||
|
||||
if (wake_word_encode_task_stack_ != nullptr) {
|
||||
free(wake_word_encode_task_stack_);
|
||||
}
|
||||
|
||||
vEventGroupDelete(event_group_);
|
||||
}, "audio_detection", 4096 * 2, this, 1, NULL);
|
||||
}
|
||||
|
||||
void WakeWordDetect::OnWakeWordDetected(std::function<void()> callback) {
|
||||
@@ -97,10 +103,10 @@ bool WakeWordDetect::IsDetectionRunning() {
|
||||
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
|
||||
}
|
||||
|
||||
void WakeWordDetect::Feed(const int16_t* data, int size) {
|
||||
input_buffer_.insert(input_buffer_.end(), data, data + size);
|
||||
void WakeWordDetect::Feed(std::vector<int16_t>& data) {
|
||||
input_buffer_.insert(input_buffer_.end(), data.begin(), data.end());
|
||||
|
||||
auto chunk_size = esp_afe_sr_v1.get_feed_chunksize(afe_detection_data_);
|
||||
auto chunk_size = esp_afe_sr_v1.get_feed_chunksize(afe_detection_data_) * channels_;
|
||||
while (input_buffer_.size() >= chunk_size) {
|
||||
esp_afe_sr_v1.feed(afe_detection_data_, input_buffer_.data());
|
||||
input_buffer_.erase(input_buffer_.begin(), input_buffer_.begin() + chunk_size);
|
||||
@@ -166,7 +172,7 @@ void WakeWordDetect::EncodeWakeWordData() {
|
||||
auto start_time = esp_timer_get_time();
|
||||
// encode detect packets
|
||||
OpusEncoder* encoder = new OpusEncoder();
|
||||
encoder->Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1, 60);
|
||||
encoder->Configure(16000, 1, 60);
|
||||
encoder->SetComplexity(0);
|
||||
this_->wake_word_opus_.resize(4096 * 4);
|
||||
size_t offset = 0;
|
||||
|
||||
@@ -19,7 +19,8 @@ public:
|
||||
WakeWordDetect();
|
||||
~WakeWordDetect();
|
||||
|
||||
void Feed(const int16_t* data, int size);
|
||||
void Initialize(int channels, bool reference);
|
||||
void Feed(std::vector<int16_t>& data);
|
||||
void OnWakeWordDetected(std::function<void()> callback);
|
||||
void OnVadStateChange(std::function<void(bool speaking)> callback);
|
||||
void StartDetection();
|
||||
@@ -36,6 +37,8 @@ private:
|
||||
std::function<void()> wake_word_detected_callback_;
|
||||
std::function<void(bool speaking)> vad_state_change_callback_;
|
||||
bool is_speaking_ = false;
|
||||
int channels_;
|
||||
bool reference_;
|
||||
|
||||
TaskHandle_t wake_word_encode_task_ = nullptr;
|
||||
StaticTask_t wake_word_encode_task_buffer_;
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
## IDF Component Manager Manifest File
|
||||
dependencies:
|
||||
78/esp-builtin-led: "^1.0.2"
|
||||
78/esp-wifi-connect: "^1.1.0"
|
||||
78/esp-wifi-connect: "^1.2.0"
|
||||
78/esp-opus-encoder: "^1.0.2"
|
||||
78/esp-ml307: "^1.2.1"
|
||||
espressif/esp_codec_dev: "^1.3.1"
|
||||
espressif/esp-sr: "^1.9.0"
|
||||
espressif/button: "^3.3.1"
|
||||
lvgl/lvgl: "^8.4.0"
|
||||
|
||||
13
main/main.cc
13
main/main.cc
@@ -13,6 +13,19 @@
|
||||
|
||||
extern "C" void app_main(void)
|
||||
{
|
||||
#ifdef CONFIG_AUDIO_CODEC_ES8311_ES7210
|
||||
// Make GPIO15 HIGH to enable the 4G module
|
||||
gpio_config_t ml307_enable_config = {
|
||||
.pin_bit_mask = (1ULL << 15),
|
||||
.mode = GPIO_MODE_OUTPUT,
|
||||
.pull_up_en = GPIO_PULLUP_DISABLE,
|
||||
.pull_down_en = GPIO_PULLDOWN_DISABLE,
|
||||
.intr_type = GPIO_INTR_DISABLE,
|
||||
};
|
||||
gpio_config(&ml307_enable_config);
|
||||
gpio_set_level(GPIO_NUM_15, 1);
|
||||
#endif
|
||||
|
||||
// Check if the reset button is pressed
|
||||
SystemReset system_reset;
|
||||
system_reset.CheckButtons();
|
||||
|
||||
2667
sdkconfig.box
Normal file
2667
sdkconfig.box
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user