diff --git a/main/Application.cc b/main/Application.cc index 63bf0286..235718cd 100644 --- a/main/Application.cc +++ b/main/Application.cc @@ -7,9 +7,6 @@ #include "silk_resampler.h" #define TAG "application" -#define INPUT_SAMPLE_RATE 16000 -#define DECODE_SAMPLE_RATE 24000 -#define OUTPUT_SAMPLE_RATE 24000 Application::Application() { @@ -27,10 +24,10 @@ Application::Application() { } } - opus_encoder_.Configure(INPUT_SAMPLE_RATE, 1); - opus_decoder_ = opus_decoder_create(DECODE_SAMPLE_RATE, 1, NULL); - if (DECODE_SAMPLE_RATE != OUTPUT_SAMPLE_RATE) { - assert(0 == silk_resampler_init(&resampler_state_, DECODE_SAMPLE_RATE, OUTPUT_SAMPLE_RATE, 1)); + opus_encoder_.Configure(CONFIG_AUDIO_INPUT_SAMPLE_RATE, 1); + opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL); + if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) { + assert(0 == silk_resampler_init(&resampler_state_, opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE, 1)); } } @@ -59,7 +56,7 @@ Application::~Application() { } void Application::Start() { - audio_device_.Start(INPUT_SAMPLE_RATE, OUTPUT_SAMPLE_RATE); + audio_device_.Start(CONFIG_AUDIO_INPUT_SAMPLE_RATE, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE); audio_device_.OnStateChanged([this]() { if (audio_device_.playing()) { SetChatState(kChatStateSpeaking); @@ -154,7 +151,7 @@ void Application::StartCommunication() { .total_ch_num = 1, .mic_num = 1, .ref_num = 0, - .sample_rate = INPUT_SAMPLE_RATE + .sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE, }, .debug_init = false, .debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }}, @@ -195,7 +192,7 @@ void Application::StartDetection() { .total_ch_num = 1, .mic_num = 1, .ref_num = 0, - .sample_rate = INPUT_SAMPLE_RATE + .sample_rate = CONFIG_AUDIO_INPUT_SAMPLE_RATE }, .debug_init = false, .debug_hook = {{ AFE_DEBUG_HOOK_MASE_TASK_IN, NULL }, { AFE_DEBUG_HOOK_FETCH_TASK_IN, NULL }}, @@ -335,11 +332,11 @@ void Application::AudioEncodeTask() { } void Application::AudioDecodeTask() { - int frame_size = DECODE_SAMPLE_RATE / 1000 * opus_duration_ms_; - while (true) { AudioPacket* packet; xQueueReceive(audio_decode_queue_, &packet, portMAX_DELAY); + + int frame_size = opus_decode_sample_rate_ / 1000 * opus_duration_ms_; packet->pcm.resize(frame_size); int ret = opus_decode(opus_decoder_, packet->opus.data(), packet->opus.size(), packet->pcm.data(), frame_size, 0); @@ -349,8 +346,8 @@ void Application::AudioDecodeTask() { continue; } - if (DECODE_SAMPLE_RATE != OUTPUT_SAMPLE_RATE) { - int target_size = frame_size * OUTPUT_SAMPLE_RATE / DECODE_SAMPLE_RATE; + if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) { + int target_size = frame_size * CONFIG_AUDIO_OUTPUT_SAMPLE_RATE / opus_decode_sample_rate_; std::vector resampled(target_size); assert(0 == silk_resampler(&resampler_state_, resampled.data(), packet->pcm.data(), frame_size)); packet->pcm = std::move(resampled); @@ -360,6 +357,19 @@ void Application::AudioDecodeTask() { } } +void Application::SetDecodeSampleRate(int sample_rate) { + if (opus_decode_sample_rate_ == sample_rate) { + return; + } + + opus_decoder_destroy(opus_decoder_); + opus_decode_sample_rate_ = sample_rate; + opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL); + if (opus_decode_sample_rate_ != CONFIG_AUDIO_OUTPUT_SAMPLE_RATE) { + assert(0 == silk_resampler_init(&resampler_state_, opus_decode_sample_rate_, CONFIG_AUDIO_OUTPUT_SAMPLE_RATE, 1)); + } +} + void Application::StartWebSocketClient() { if (ws_client_ != nullptr) { delete ws_client_; @@ -379,7 +389,7 @@ void Application::StartWebSocketClient() { message += "\"type\":\"hello\", \"version\":\"1.0\","; message += "\"wakeup_model\":\"" + std::string(wakenet_model_) + "\","; message += "\"audio_params\":{"; - message += "\"format\":\"opus\", \"sample_rate\":" + std::to_string(INPUT_SAMPLE_RATE) + ", \"channels\":1"; + message += "\"format\":\"opus\", \"sample_rate\":" + std::to_string(CONFIG_AUDIO_INPUT_SAMPLE_RATE) + ", \"channels\":1"; message += "}}"; ws_client_->Send(message); }); @@ -403,6 +413,10 @@ void Application::StartWebSocketClient() { auto state = cJSON_GetObjectItem(root, "state"); if (strcmp(state->valuestring, "start") == 0) { packet->type = kAudioPacketTypeStart; + auto sample_rate = cJSON_GetObjectItem(root, "sample_rate"); + if (sample_rate != NULL) { + SetDecodeSampleRate(sample_rate->valueint); + } } else if (strcmp(state->valuestring, "stop") == 0) { packet->type = kAudioPacketTypeStop; } else if (strcmp(state->valuestring, "sentence_end") == 0) { diff --git a/main/Application.h b/main/Application.h index 922e0e91..c10a6dc3 100644 --- a/main/Application.h +++ b/main/Application.h @@ -62,8 +62,10 @@ private: OpusDecoder* opus_decoder_ = nullptr; int opus_duration_ms_ = 60; + int opus_decode_sample_rate_ = CONFIG_AUDIO_OUTPUT_SAMPLE_RATE; silk_resampler_state_struct resampler_state_; + void SetDecodeSampleRate(int sample_rate); void SetChatState(ChatState state); void StartDetection(); void StartCommunication(); diff --git a/main/AudioDevice.cc b/main/AudioDevice.cc index 91a97231..eae881cd 100644 --- a/main/AudioDevice.cc +++ b/main/AudioDevice.cc @@ -23,15 +23,14 @@ AudioDevice::~AudioDevice() { } void AudioDevice::Start(int input_sample_rate, int output_sample_rate) { - assert(input_sample_rate == 16000); input_sample_rate_ = input_sample_rate; output_sample_rate_ = output_sample_rate; - if (output_sample_rate == 16000) { - CreateDuplexChannels(); - } else { +#ifdef CONFIG_AUDIO_DEVICE_I2S_SIMPLEX CreateSimplexChannels(); - } +#else + CreateDuplexChannels(); +#endif ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_)); ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_)); @@ -77,10 +76,10 @@ void AudioDevice::CreateDuplexChannels() { }, .gpio_cfg = { .mclk = I2S_GPIO_UNUSED, - .bclk = GPIO_NUM_5, - .ws = GPIO_NUM_4, - .dout = GPIO_NUM_6, - .din = GPIO_NUM_3, + .bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_BCLK, + .ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_WS, + .dout = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DOUT, + .din = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DIN, .invert_flags = { .mclk_inv = false, .bclk_inv = false, @@ -93,6 +92,7 @@ void AudioDevice::CreateDuplexChannels() { ESP_LOGI(TAG, "Duplex channels created"); } +#ifdef CONFIG_AUDIO_DEVICE_I2S_SIMPLEX void AudioDevice::CreateSimplexChannels() { // Create a new channel for speaker i2s_chan_config_t chan_cfg = { @@ -127,9 +127,9 @@ void AudioDevice::CreateSimplexChannels() { }, .gpio_cfg = { .mclk = I2S_GPIO_UNUSED, - .bclk = GPIO_NUM_5, - .ws = GPIO_NUM_4, - .dout = GPIO_NUM_6, + .bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_BCLK, + .ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_WS, + .dout = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DOUT, .din = I2S_GPIO_UNUSED, .invert_flags = { .mclk_inv = false, @@ -144,13 +144,14 @@ void AudioDevice::CreateSimplexChannels() { chan_cfg.id = I2S_NUM_1; ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, nullptr, &rx_handle_)); std_cfg.clk_cfg.sample_rate_hz = (uint32_t)input_sample_rate_; - std_cfg.gpio_cfg.bclk = GPIO_NUM_11; - std_cfg.gpio_cfg.ws = GPIO_NUM_10; + std_cfg.gpio_cfg.bclk = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_BCLK; + std_cfg.gpio_cfg.ws = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_MIC_GPIO_WS; std_cfg.gpio_cfg.dout = I2S_GPIO_UNUSED; - std_cfg.gpio_cfg.din = GPIO_NUM_3; + std_cfg.gpio_cfg.din = (gpio_num_t)CONFIG_AUDIO_DEVICE_I2S_GPIO_DIN; ESP_ERROR_CHECK(i2s_channel_init_std_mode(rx_handle_, &std_cfg)); ESP_LOGI(TAG, "Simplex channels created"); } +#endif void AudioDevice::Write(const int16_t* data, int samples) { int32_t buffer[samples]; diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index f1345843..d5e77fc4 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -19,4 +19,60 @@ config BUILTIN_LED_GPIO help GPIO number of the builtin LED. +config AUDIO_INPUT_SAMPLE_RATE + int "Audio Input Sample Rate" + default 16000 + help + Audio input sample rate. + +config AUDIO_OUTPUT_SAMPLE_RATE + int "Audio Output Sample Rate" + default 24000 + help + Audio output sample rate. + +config AUDIO_DEVICE_I2S_GPIO_BCLK + int "I2S GPIO BCLK" + default 5 + help + GPIO number of the I2S BCLK. + +config AUDIO_DEVICE_I2S_GPIO_WS + int "I2S GPIO WS" + default 4 + help + GPIO number of the I2S WS. + +config AUDIO_DEVICE_I2S_GPIO_DOUT + int "I2S GPIO DOUT" + default 6 + help + GPIO number of the I2S DOUT. + +config AUDIO_DEVICE_I2S_GPIO_DIN + int "I2S GPIO DIN" + default 3 + help + GPIO number of the I2S DIN. + +config AUDIO_DEVICE_I2S_SIMPLEX + bool "I2S Simplex" + default n + help + Enable I2S Simplex mode. + +config AUDIO_DEVICE_I2S_MIC_GPIO_BCLK + int "I2S MIC GPIO BCLK" + default 11 + depends on AUDIO_DEVICE_I2S_SIMPLEX + help + GPIO number of the I2S MIC BCLK. + +config AUDIO_DEVICE_I2S_MIC_GPIO_WS + int "I2S MIC GPIO WS" + default 10 + depends on AUDIO_DEVICE_I2S_SIMPLEX + help + GPIO number of the I2S MIC WS. + endmenu