diff --git a/CMakeLists.txt b/CMakeLists.txt index f1c5fa1f..272a137b 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # CMakeLists in this exact order for cmake to work correctly cmake_minimum_required(VERSION 3.16) -set(PROJECT_VER "0.7.2") +set(PROJECT_VER "0.8.0") include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(xiaozhi) diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index ec389be3..3906cf29 100755 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -8,6 +8,8 @@ set(SOURCES "audio_codec.cc" "board.cc" "boards/wifi_board.cc" "boards/ml307_board.cc" + "protocol.cc" + "protocols/mqtt_protocol.cc" "system_info.cc" "system_reset.cc" "application.cc" @@ -29,6 +31,8 @@ elseif(CONFIG_BOARD_TYPE_KEVIN_BOX_0) set(BOARD_TYPE "kevin-box-0") elseif(CONFIG_BOARD_TYPE_KEVIN_BOX_1) set(BOARD_TYPE "kevin-box-1") +elseif(CONFIG_BOARD_TYPE_KEVIN_BOX_2) + set(BOARD_TYPE "kevin-box-2") elseif(CONFIG_BOARD_TYPE_LICHUANG_DEV) set(BOARD_TYPE "lichuang-dev") endif() diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index 2446d2bb..8e088223 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -33,6 +33,8 @@ choice BOARD_TYPE bool "Kevin Box 0" config BOARD_TYPE_KEVIN_BOX_1 bool "Kevin Box 1" + config BOARD_TYPE_KEVIN_BOX_2 + bool "Kevin Box 2" config BOARD_TYPE_LICHUANG_DEV bool "立创开发板" endchoice diff --git a/main/application.cc b/main/application.cc index 0b00ca3f..41f3bd12 100644 --- a/main/application.cc +++ b/main/application.cc @@ -2,6 +2,7 @@ #include "system_info.h" #include "ml307_ssl_transport.h" #include "audio_codec.h" +#include "protocols/mqtt_protocol.h" #include #include @@ -27,8 +28,8 @@ Application::Application() { } Application::~Application() { - if (ws_client_ != nullptr) { - delete ws_client_; + if (protocol_ != nullptr) { + delete protocol_; } if (opus_decoder_ != nullptr) { opus_decoder_destroy(opus_decoder_); @@ -48,10 +49,6 @@ void Application::CheckNewVersion() { ota_.SetPostData(Board::GetInstance().GetJson()); ota_.CheckVersion(); if (ota_.HasNewVersion()) { - // Wait for the chat state to be idle - while (chat_state_ != kChatStateIdle) { - vTaskDelay(100); - } SetChatState(kChatStateUpgrading); ota_.StartUpgrade([](int progress, size_t speed) { char buffer[64]; @@ -84,49 +81,36 @@ void Application::Alert(const std::string&& title, const std::string&& message) void Application::PlayLocalFile(const char* data, size_t size) { ESP_LOGI(TAG, "PlayLocalFile: %zu bytes", size); SetDecodeSampleRate(16000); - auto codec = Board::GetInstance().GetAudioCodec(); - codec->EnableOutput(true); + for (const char* p = data; p < data + size; ) { + auto p3 = (BinaryProtocol3*)p; + p += sizeof(BinaryProtocol3); + + auto payload_size = ntohs(p3->payload_size); + std::string opus; + opus.resize(payload_size); + memcpy(opus.data(), p3->payload, payload_size); + p += payload_size; - { std::lock_guard lock(mutex_); - auto packet = new AudioPacket(); - packet->type = kAudioPacketTypeStart; - audio_decode_queue_.push_back(packet); - } - - ParseBinaryProtocol3(data, size); - - { - std::lock_guard lock(mutex_); - auto packet = new AudioPacket(); - packet->type = kAudioPacketTypeStop; - audio_decode_queue_.push_back(packet); - cv_.notify_all(); + audio_decode_queue_.emplace_back(std::move(opus)); } + cv_.notify_all(); } void Application::ToggleChatState() { Schedule([this]() { if (chat_state_ == kChatStateIdle) { SetChatState(kChatStateConnecting); - StartWebSocketClient(); - - if (ws_client_ && ws_client_->IsConnected()) { + if (protocol_->OpenAudioChannel()) { opus_encoder_.ResetState(); -#ifdef CONFIG_USE_AFE_SR - audio_processor_.Start(); -#endif SetChatState(kChatStateListening); - ESP_LOGI(TAG, "Communication started"); } else { SetChatState(kChatStateIdle); } } else if (chat_state_ == kChatStateSpeaking) { AbortSpeaking(); } else if (chat_state_ == kChatStateListening) { - if (ws_client_ && ws_client_->IsConnected()) { - ws_client_->Close(); - } + protocol_->CloseAudioChannel(); } }); } @@ -139,9 +123,11 @@ void Application::Start() { builtin_led->SetBlue(); builtin_led->StartContinuousBlink(100); + /* Setup the display */ auto display = board.GetDisplay(); display->SetupUI(); + /* Setup the audio codec */ auto codec = board.GetAudioCodec(); opus_decode_sample_rate_ = codec->output_sample_rate(); opus_decoder_ = opus_decoder_create(opus_decode_sample_rate_, 1, NULL); @@ -150,10 +136,6 @@ void Application::Start() { input_resampler_.Configure(codec->input_sample_rate(), 16000); reference_resampler_.Configure(codec->input_sample_rate(), 16000); } - - codec->EnableInput(true); - codec->EnableOutput(true); - codec->EnableOutput(false); codec->OnInputData([this, codec](std::vector&& data) { if (codec->input_sample_rate() != 16000) { if (codec->input_channels() == 2) { @@ -196,8 +178,7 @@ void Application::Start() { #endif }); - // OPUS encoder / decoder use a lot of stack memory - const size_t opus_stack_size = 4096 * 8; + const size_t opus_stack_size = 4096 * 8; // OPUS encoder / decoder use a lot of stack memory audio_encode_task_stack_ = (StackType_t*)heap_caps_malloc(opus_stack_size, MALLOC_CAP_SPIRAM); audio_encode_task_ = xTaskCreateStatic([](void* arg) { Application* app = (Application*)arg; @@ -205,18 +186,12 @@ void Application::Start() { vTaskDelete(NULL); }, "opus_encode", opus_stack_size, this, 1, audio_encode_task_stack_, &audio_encode_task_buffer_); - xTaskCreate([](void* arg) { - Application* app = (Application*)arg; - app->AudioPlayTask(); - vTaskDelete(NULL); - }, "play_audio", 4096 * 4, this, 4, NULL); + codec->Start(); + /* Wait for the network to be ready */ board.StartNetwork(); - // Blink the LED to indicate the device is running - builtin_led->SetGreen(); - builtin_led->BlinkOnce(); - const size_t main_loop_stack_size = 4096 * 2; + const size_t main_loop_stack_size = 4096 * 8; main_loop_task_stack_ = (StackType_t*)heap_caps_malloc(main_loop_stack_size, MALLOC_CAP_SPIRAM); xTaskCreateStatic([](void* arg) { Application* app = (Application*)arg; @@ -224,23 +199,23 @@ void Application::Start() { vTaskDelete(NULL); }, "main_loop", main_loop_stack_size, this, 1, main_loop_task_stack_, &main_loop_task_buffer_); - // Launch a task to check for new firmware version - xTaskCreate([](void* arg) { - Application* app = (Application*)arg; - app->CheckNewVersion(); - vTaskDelete(NULL); - }, "check_new_version", 4096 * 2, this, 1, NULL); + // Check for new firmware version or get the MQTT broker address + while (true) { + CheckNewVersion(); + + if (ota_.HasMqttConfig()) { + break; + } + Alert("Error", "Missing MQTT config"); + vTaskDelay(pdMS_TO_TICKS(10000)); + } #ifdef CONFIG_USE_AFE_SR audio_processor_.Initialize(codec->input_channels(), codec->input_reference()); audio_processor_.OnOutput([this](std::vector&& data) { - Schedule([this, data = std::move(data)]() { - if (chat_state_ == kChatStateListening) { - std::lock_guard lock(mutex_); - audio_encode_queue_.emplace_back(std::move(data)); - cv_.notify_all(); - } - }); + std::lock_guard lock(mutex_); + audio_encode_queue_.emplace_back(std::move(data)); + cv_.notify_all(); }); wake_word_detect_.Initialize(codec->input_channels(), codec->input_reference()); @@ -261,24 +236,18 @@ void Application::Start() { wake_word_detect_.OnWakeWordDetected([this]() { Schedule([this]() { if (chat_state_ == kChatStateIdle) { - // Encode the wake word data and start websocket client at the same time - // They both consume a lot of time (700ms), so we can do them in parallel + SetChatState(kChatStateConnecting); wake_word_detect_.EncodeWakeWordData(); - SetChatState(kChatStateConnecting); - if (ws_client_ == nullptr) { - StartWebSocketClient(); - } - if (ws_client_ && ws_client_->IsConnected()) { - auto encoded = wake_word_detect_.GetWakeWordStream(); - // Send the wake word data to the server - ws_client_->Send(encoded.data(), encoded.size(), true); + if (protocol_->OpenAudioChannel()) { + std::string opus; + // Encode and send the wake word data to the server + while (wake_word_detect_.GetWakeWordOpus(opus)) { + protocol_->SendAudio(opus); + } opus_encoder_.ResetState(); // Send a ready message to indicate the server that the wake word data is sent SetChatState(kChatStateWakeWordDetected); - // If connected, the hello message is already sent, so we can start communication - audio_processor_.Start(); - ESP_LOGI(TAG, "Audio processor started"); } else { SetChatState(kChatStateIdle); } @@ -293,7 +262,68 @@ void Application::Start() { wake_word_detect_.StartDetection(); #endif - chat_state_ = kChatStateIdle; + // Initialize the protocol + display->SetText("Starting\nProtocol..."); + protocol_ = new MqttProtocol(ota_.GetMqttConfig()); + protocol_->OnIncomingAudio([this](const std::string& data) { + std::lock_guard lock(mutex_); + audio_decode_queue_.emplace_back(std::move(data)); + cv_.notify_all(); + }); + protocol_->OnAudioChannelClosed([this]() { + Schedule([this]() { + SetChatState(kChatStateIdle); + }); + }); + protocol_->OnIncomingJson([this](const cJSON* root) { + // Parse JSON data + auto type = cJSON_GetObjectItem(root, "type"); + if (strcmp(type->valuestring, "tts") == 0) { + auto state = cJSON_GetObjectItem(root, "state"); + if (strcmp(state->valuestring, "start") == 0) { + Schedule([this]() { + skip_to_end_ = false; + SetChatState(kChatStateSpeaking); + }); + } else if (strcmp(state->valuestring, "stop") == 0) { + Schedule([this]() { + auto codec = Board::GetInstance().GetAudioCodec(); + codec->WaitForOutputDone(); + SetChatState(kChatStateListening); + }); + } else if (strcmp(state->valuestring, "sentence_start") == 0) { + auto text = cJSON_GetObjectItem(root, "text"); + if (text != NULL) { + ESP_LOGI(TAG, ">> %s", text->valuestring); + } + } + } else if (strcmp(type->valuestring, "stt") == 0) { + auto text = cJSON_GetObjectItem(root, "text"); + if (text != NULL) { + ESP_LOGI(TAG, ">> %s", text->valuestring); + } + } else if (strcmp(type->valuestring, "llm") == 0) { + auto emotion = cJSON_GetObjectItem(root, "emotion"); + if (emotion != NULL) { + ESP_LOGD(TAG, "EMOTION: %s", emotion->valuestring); + } + } else if (strcmp(type->valuestring, "hello") == 0) { + // Get sample rate from hello message + auto audio_params = cJSON_GetObjectItem(root, "audio_params"); + if (audio_params != NULL) { + auto sample_rate = cJSON_GetObjectItem(audio_params, "sample_rate"); + if (sample_rate != NULL) { + SetDecodeSampleRate(sample_rate->valueint); + } + } + } + }); + + // Blink the LED to indicate the device is running + builtin_led->SetGreen(); + builtin_led->BlinkOnce(); + + SetChatState(kChatStateIdle); display->UpdateDisplay(); } @@ -321,16 +351,12 @@ void Application::MainLoop() { void Application::AbortSpeaking() { ESP_LOGI(TAG, "Abort speaking"); - skip_to_end_ = true; + std::string json = "{\"type\":\"abort\"}"; + protocol_->SendText(json); - if (ws_client_ && ws_client_->IsConnected()) { - cJSON* root = cJSON_CreateObject(); - cJSON_AddStringToObject(root, "type", "abort"); - char* json = cJSON_PrintUnformatted(root); - ws_client_->Send(json); - cJSON_Delete(root); - free(json); - } + skip_to_end_ = true; + auto codec = Board::GetInstance().GetAudioCodec(); + codec->ClearOutputQueue(); } void Application::SetChatState(ChatState state) { @@ -359,6 +385,9 @@ void Application::SetChatState(ChatState state) { case kChatStateIdle: builtin_led->TurnOff(); display->SetText("I'm\nIdle."); +#ifdef CONFIG_USE_AFE_SR + audio_processor_.Stop(); +#endif break; case kChatStateConnecting: builtin_led->SetBlue(); @@ -369,11 +398,17 @@ void Application::SetChatState(ChatState state) { builtin_led->SetRed(); builtin_led->TurnOn(); display->SetText("I'm\nListening..."); +#ifdef CONFIG_USE_AFE_SR + audio_processor_.Start(); +#endif break; case kChatStateSpeaking: builtin_led->SetGreen(); builtin_led->TurnOn(); display->SetText("I'm\nSpeaking..."); +#ifdef CONFIG_USE_AFE_SR + audio_processor_.Stop(); +#endif break; case kChatStateWakeWordDetected: builtin_led->SetBlue(); @@ -385,37 +420,20 @@ void Application::SetChatState(ChatState state) { break; } - if (ws_client_ && ws_client_->IsConnected()) { - cJSON* root = cJSON_CreateObject(); - cJSON_AddStringToObject(root, "type", "state"); - cJSON_AddStringToObject(root, "state", state_str[chat_state_]); - char* json = cJSON_PrintUnformatted(root); - ws_client_->Send(json); - cJSON_Delete(root); - free(json); - } -} - -BinaryProtocol3* Application::AllocateBinaryProtocol3(const uint8_t* payload, size_t payload_size) { - auto protocol = (BinaryProtocol3*)heap_caps_malloc(sizeof(BinaryProtocol3) + payload_size, MALLOC_CAP_SPIRAM); - assert(protocol != nullptr); - protocol->type = 0; - protocol->reserved = 0; - protocol->payload_size = htons(payload_size); - assert(sizeof(BinaryProtocol3) == 4UL); - memcpy(protocol->payload, payload, payload_size); - return protocol; + std::string json = "{\"type\":\"state\",\"state\":\""; + json += state_str[chat_state_]; + json += "\"}"; + protocol_->SendText(json); } void Application::AudioEncodeTask() { ESP_LOGI(TAG, "Audio encode task started"); - const int max_audio_play_queue_size_ = 2; // avoid decoding too fast auto codec = Board::GetInstance().GetAudioCodec(); while (true) { std::unique_lock lock(mutex_); cv_.wait(lock, [this]() { - return !audio_encode_queue_.empty() || (!audio_decode_queue_.empty() && audio_play_queue_.size() < max_audio_play_queue_size_); + return !audio_encode_queue_.empty() || !audio_decode_queue_.empty(); }); if (!audio_encode_queue_.empty()) { @@ -423,108 +441,38 @@ void Application::AudioEncodeTask() { audio_encode_queue_.pop_front(); lock.unlock(); - // Encode audio data opus_encoder_.Encode(pcm, [this](const uint8_t* opus, size_t opus_size) { - auto protocol = AllocateBinaryProtocol3(opus, opus_size); - Schedule([this, protocol, opus_size]() { - if (ws_client_ && ws_client_->IsConnected()) { - if (!ws_client_->Send(protocol, sizeof(BinaryProtocol3) + opus_size, true)) { - ESP_LOGE(TAG, "Failed to send audio data"); - } - } - heap_caps_free(protocol); + Schedule([this, data = std::string(reinterpret_cast(opus), opus_size)]() { + protocol_->SendAudio(data); }); }); } else if (!audio_decode_queue_.empty()) { - auto packet = std::move(audio_decode_queue_.front()); + auto opus = std::move(audio_decode_queue_.front()); audio_decode_queue_.pop_front(); lock.unlock(); - if (packet->type == kAudioPacketTypeData && !skip_to_end_) { - int frame_size = opus_decode_sample_rate_ * opus_duration_ms_ / 1000; - packet->pcm.resize(frame_size); - - int ret = opus_decode(opus_decoder_, packet->opus.data(), packet->opus.size(), packet->pcm.data(), frame_size, 0); - if (ret < 0) { - ESP_LOGE(TAG, "Failed to decode audio, error code: %d", ret); - delete packet; - continue; - } - - if (opus_decode_sample_rate_ != codec->output_sample_rate()) { - int target_size = output_resampler_.GetOutputSamples(frame_size); - std::vector resampled(target_size); - output_resampler_.Process(packet->pcm.data(), frame_size, resampled.data()); - packet->pcm = std::move(resampled); - } + if (skip_to_end_) { + continue; } - std::lock_guard lock(mutex_); - audio_play_queue_.push_back(packet); - cv_.notify_all(); - } - } -} + int frame_size = opus_decode_sample_rate_ * opus_duration_ms_ / 1000; + std::vector pcm(frame_size); -void Application::HandleAudioPacket(AudioPacket* packet) { - switch (packet->type) - { - case kAudioPacketTypeData: { - if (skip_to_end_) { - break; - } - - // This will block until the audio device has finished playing the audio - auto codec = Board::GetInstance().GetAudioCodec(); - codec->OutputData(packet->pcm); - break; - } - case kAudioPacketTypeStart: - break_speaking_ = false; - skip_to_end_ = false; - Schedule([this]() { - SetChatState(kChatStateSpeaking); - }); - break; - case kAudioPacketTypeStop: - Schedule([this]() { - if (ws_client_ && ws_client_->IsConnected()) { - SetChatState(kChatStateListening); - } else { - SetChatState(kChatStateIdle); + int ret = opus_decode(opus_decoder_, (const unsigned char*)opus.data(), opus.size(), pcm.data(), frame_size, 0); + if (ret < 0) { + ESP_LOGE(TAG, "Failed to decode audio, error code: %d", ret); + continue; } - }); - break; - case kAudioPacketTypeSentenceStart: - ESP_LOGI(TAG, "<< %s", packet->text.c_str()); - break; - case kAudioPacketTypeSentenceEnd: - if (break_speaking_) { - skip_to_end_ = true; + + // Resample if the sample rate is different + if (opus_decode_sample_rate_ != codec->output_sample_rate()) { + int target_size = output_resampler_.GetOutputSamples(frame_size); + std::vector resampled(target_size); + output_resampler_.Process(pcm.data(), frame_size, resampled.data()); + pcm = std::move(resampled); + } + codec->OutputData(pcm); } - break; - default: - ESP_LOGI(TAG, "Unknown packet type: %d", packet->type); - break; - } - - delete packet; -} - -void Application::AudioPlayTask() { - ESP_LOGI(TAG, "Audio play task started"); - - while (true) { - std::unique_lock lock(mutex_); - cv_.wait(lock, [this]() { - return !audio_play_queue_.empty(); - }); - auto packet = std::move(audio_play_queue_.front()); - audio_play_queue_.pop_front(); - cv_.notify_all(); - lock.unlock(); - - HandleAudioPacket(packet); } } @@ -543,127 +491,3 @@ void Application::SetDecodeSampleRate(int sample_rate) { output_resampler_.Configure(opus_decode_sample_rate_, codec->output_sample_rate()); } } - -void Application::ParseBinaryProtocol3(const char* data, size_t size) { - for (const char* p = data; p < data + size; ) { - auto protocol = (BinaryProtocol3*)p; - p += sizeof(BinaryProtocol3); - - auto packet = new AudioPacket(); - packet->type = kAudioPacketTypeData; - auto payload_size = ntohs(protocol->payload_size); - packet->opus.resize(payload_size); - memcpy(packet->opus.data(), protocol->payload, payload_size); - p += payload_size; - - std::lock_guard lock(mutex_); - audio_decode_queue_.push_back(packet); - } -} - -void Application::StartWebSocketClient() { - if (ws_client_ != nullptr) { - ESP_LOGW(TAG, "WebSocket client already exists"); - delete ws_client_; - } - - std::string url = CONFIG_WEBSOCKET_URL; - std::string token = "Bearer " + std::string(CONFIG_WEBSOCKET_ACCESS_TOKEN); - ws_client_ = Board::GetInstance().CreateWebSocket(); - ws_client_->SetHeader("Authorization", token.c_str()); - ws_client_->SetHeader("Protocol-Version", std::to_string(PROTOCOL_VERSION).c_str()); - ws_client_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); - - ws_client_->OnConnected([this]() { - ESP_LOGI(TAG, "Websocket connected"); - - // Send hello message to describe the client - // keys: message type, version, wakeup_model, audio_params (format, sample_rate, channels) - std::string message = "{"; - message += "\"type\":\"hello\","; - message += "\"audio_params\":{"; - message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1"; - message += "}}"; - ws_client_->Send(message); - }); - - ws_client_->OnData([this](const char* data, size_t len, bool binary) { - if (binary) { - ParseBinaryProtocol3(data, len); - cv_.notify_all(); - } else { - // Parse JSON data - auto root = cJSON_Parse(data); - auto type = cJSON_GetObjectItem(root, "type"); - if (type != NULL) { - if (strcmp(type->valuestring, "tts") == 0) { - auto packet = new AudioPacket(); - auto state = cJSON_GetObjectItem(root, "state"); - if (strcmp(state->valuestring, "start") == 0) { - packet->type = kAudioPacketTypeStart; - auto sample_rate = cJSON_GetObjectItem(root, "sample_rate"); - if (sample_rate != NULL) { - SetDecodeSampleRate(sample_rate->valueint); - } - - // If the device is speaking, we need to skip the last session - skip_to_end_ = true; - } else if (strcmp(state->valuestring, "stop") == 0) { - packet->type = kAudioPacketTypeStop; - } else if (strcmp(state->valuestring, "sentence_end") == 0) { - packet->type = kAudioPacketTypeSentenceEnd; - } else if (strcmp(state->valuestring, "sentence_start") == 0) { - packet->type = kAudioPacketTypeSentenceStart; - packet->text = cJSON_GetObjectItem(root, "text")->valuestring; - } - - std::lock_guard lock(mutex_); - audio_decode_queue_.push_back(packet); - cv_.notify_all(); - } else if (strcmp(type->valuestring, "stt") == 0) { - auto text = cJSON_GetObjectItem(root, "text"); - if (text != NULL) { - ESP_LOGI(TAG, ">> %s", text->valuestring); - } - } else if (strcmp(type->valuestring, "llm") == 0) { - auto emotion = cJSON_GetObjectItem(root, "emotion"); - if (emotion != NULL) { - ESP_LOGD(TAG, "EMOTION: %s", emotion->valuestring); - } - } else { - ESP_LOGW(TAG, "Unknown message type: %s", type->valuestring); - } - } else { - ESP_LOGE(TAG, "Missing message type, data: %s", data); - } - cJSON_Delete(root); - } - }); - - ws_client_->OnError([this](int error) { - ESP_LOGE(TAG, "Websocket error: %d", error); - }); - - ws_client_->OnDisconnected([this]() { - ESP_LOGI(TAG, "Websocket disconnected"); - Schedule([this]() { - auto codec = Board::GetInstance().GetAudioCodec(); - codec->EnableOutput(false); -#ifdef CONFIG_USE_AFE_SR - audio_processor_.Stop(); -#endif - delete ws_client_; - ws_client_ = nullptr; - SetChatState(kChatStateIdle); - }); - }); - - if (!ws_client_->Connect(url.c_str())) { - ESP_LOGE(TAG, "Failed to connect to websocket server"); - return; - } - - // 建立语音通道后打开音频输出,避免待机时喇叭底噪 - auto codec = Board::GetInstance().GetAudioCodec(); - codec->EnableOutput(true); -} diff --git a/main/application.h b/main/application.h index 67e23d46..305b78e8 100644 --- a/main/application.h +++ b/main/application.h @@ -11,8 +11,8 @@ #include "opus_encoder.h" #include "opus_resampler.h" -#include +#include "protocol.h" #include "display.h" #include "board.h" #include "ota.h" @@ -22,10 +22,6 @@ #include "audio_processor.h" #endif -#define DETECTION_RUNNING 1 -#define COMMUNICATION_RUNNING 2 - -#define PROTOCOL_VERSION 3 struct BinaryProtocol3 { uint8_t type; uint8_t reserved; @@ -33,24 +29,6 @@ struct BinaryProtocol3 { uint8_t payload[]; } __attribute__((packed)); -enum AudioPacketType { - kAudioPacketTypeUnkonwn = 0, - kAudioPacketTypeStart, - kAudioPacketTypeStop, - kAudioPacketTypeData, - kAudioPacketTypeSentenceStart, - kAudioPacketTypeSentenceEnd -}; - -struct AudioPacket { - AudioPacketType type = kAudioPacketTypeUnkonwn; - std::string text; - std::vector opus; - std::vector pcm; - uint32_t timestamp; -}; - - enum ChatState { kChatStateUnknown, kChatStateIdle, @@ -91,10 +69,9 @@ private: std::mutex mutex_; std::condition_variable_any cv_; std::list> main_tasks_; - WebSocket* ws_client_ = nullptr; + Protocol* protocol_ = nullptr; EventGroupHandle_t event_group_; volatile ChatState chat_state_ = kChatStateUnknown; - volatile bool break_speaking_ = false; bool skip_to_end_ = false; // Audio encode / decode @@ -102,8 +79,7 @@ private: StaticTask_t audio_encode_task_buffer_; StackType_t* audio_encode_task_stack_ = nullptr; std::list> audio_encode_queue_; - std::list audio_decode_queue_; - std::list audio_play_queue_; + std::list audio_decode_queue_; OpusEncoder opus_encoder_; OpusDecoder* opus_decoder_ = nullptr; @@ -119,15 +95,10 @@ private: StackType_t* main_loop_task_stack_ = nullptr; void MainLoop(); - BinaryProtocol3* AllocateBinaryProtocol3(const uint8_t* payload, size_t payload_size); - void ParseBinaryProtocol3(const char* data, size_t size); void SetDecodeSampleRate(int sample_rate); - void StartWebSocketClient(); void CheckNewVersion(); void AudioEncodeTask(); - void AudioPlayTask(); - void HandleAudioPacket(AudioPacket* packet); void PlayLocalFile(const char* data, size_t size); }; diff --git a/main/audio_codec.cc b/main/audio_codec.cc index 0d087784..b0ea4c10 100644 --- a/main/audio_codec.cc +++ b/main/audio_codec.cc @@ -3,20 +3,53 @@ #include #include +#include #define TAG "AudioCodec" AudioCodec::AudioCodec() { + audio_event_group_ = xEventGroupCreate(); } AudioCodec::~AudioCodec() { if (audio_input_task_ != nullptr) { vTaskDelete(audio_input_task_); } + if (audio_output_task_ != nullptr) { + vTaskDelete(audio_output_task_); + } + if (audio_event_group_ != nullptr) { + vEventGroupDelete(audio_event_group_); + } } void AudioCodec::OnInputData(std::function&& data)> callback) { on_input_data_ = callback; +} + +void AudioCodec::OutputData(std::vector& data) { + std::lock_guard lock(audio_output_queue_mutex_); + audio_output_queue_.emplace_back(std::move(data)); + audio_output_queue_cv_.notify_one(); +} + +IRAM_ATTR bool AudioCodec::on_sent(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx) { + auto audio_codec = (AudioCodec*)user_ctx; + xEventGroupSetBits(audio_codec->audio_event_group_, AUDIO_EVENT_OUTPUT_DONE); + return false; +} + +void AudioCodec::Start() { + // 注册音频输出回调 + i2s_event_callbacks_t callbacks = {}; + callbacks.on_sent = on_sent; + i2s_channel_register_event_callback(tx_handle_, &callbacks, this); + + ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_)); + ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_)); + + EnableInput(true); + EnableOutput(true); // 创建音频输入任务 if (audio_input_task_ == nullptr) { @@ -25,15 +58,19 @@ void AudioCodec::OnInputData(std::function&& data)> ca audio_device->InputTask(); }, "audio_input", 4096 * 2, this, 3, &audio_input_task_); } -} - -void AudioCodec::OutputData(std::vector& data) { - Write(data.data(), data.size()); + // 创建音频输出任务 + if (audio_output_task_ == nullptr) { + xTaskCreate([](void* arg) { + auto audio_device = (AudioCodec*)arg; + audio_device->OutputTask(); + }, "audio_output", 4096 * 2, this, 3, &audio_output_task_); + } } void AudioCodec::InputTask() { int duration = 30; int input_frame_size = input_sample_rate_ / 1000 * duration * input_channels_; + while (true) { std::vector input_data(input_frame_size); int samples = Read(input_data.data(), input_data.size()); @@ -45,6 +82,45 @@ void AudioCodec::InputTask() { } } +void AudioCodec::OutputTask() { + while (true) { + std::unique_lock lock(audio_output_queue_mutex_); + if (!audio_output_queue_cv_.wait_for(lock, std::chrono::seconds(30), [this]() { + return !audio_output_queue_.empty(); + })) { + // If timeout, disable output + EnableOutput(false); + continue; + } + auto data = std::move(audio_output_queue_.front()); + audio_output_queue_.pop_front(); + lock.unlock(); + + if (!output_enabled_) { + EnableOutput(true); + } + + xEventGroupClearBits(audio_event_group_, AUDIO_EVENT_OUTPUT_DONE); + Write(data.data(), data.size()); + audio_output_queue_cv_.notify_all(); + } +} + +void AudioCodec::WaitForOutputDone() { + // Wait for the output queue to be empty and the output is done + std::unique_lock lock(audio_output_queue_mutex_); + audio_output_queue_cv_.wait(lock, [this]() { + return audio_output_queue_.empty(); + }); + lock.unlock(); + xEventGroupWaitBits(audio_event_group_, AUDIO_EVENT_OUTPUT_DONE, pdFALSE, pdFALSE, portMAX_DELAY); +} + +void AudioCodec::ClearOutputQueue() { + std::lock_guard lock(audio_output_queue_mutex_); + audio_output_queue_.clear(); +} + void AudioCodec::SetOutputVolume(int volume) { output_volume_ = volume; ESP_LOGI(TAG, "Set output volume to %d", output_volume_); diff --git a/main/audio_codec.h b/main/audio_codec.h index 5f5f07b3..b5ac373f 100644 --- a/main/audio_codec.h +++ b/main/audio_codec.h @@ -3,24 +3,32 @@ #include #include +#include +#include #include #include #include +#include +#include +#include #include "board.h" +#define AUDIO_EVENT_OUTPUT_DONE (1 << 0) + class AudioCodec { public: AudioCodec(); virtual ~AudioCodec(); virtual void SetOutputVolume(int volume); - virtual void EnableInput(bool enable); - virtual void EnableOutput(bool enable); + void Start(); void OnInputData(std::function&& data)> callback); void OutputData(std::vector& data); + void WaitForOutputDone(); + void ClearOutputQueue(); inline bool duplex() const { return duplex_; } inline bool input_reference() const { return input_reference_; } @@ -32,11 +40,21 @@ public: private: TaskHandle_t audio_input_task_ = nullptr; + TaskHandle_t audio_output_task_ = nullptr; std::function&& data)> on_input_data_; + std::list> audio_output_queue_; + std::mutex audio_output_queue_mutex_; + std::condition_variable audio_output_queue_cv_; + EventGroupHandle_t audio_event_group_ = nullptr; + IRAM_ATTR static bool on_sent(i2s_chan_handle_t handle, i2s_event_data_t *event, void *user_ctx); void InputTask(); + void OutputTask(); protected: + i2s_chan_handle_t tx_handle_ = nullptr; + i2s_chan_handle_t rx_handle_ = nullptr; + bool duplex_ = false; bool input_reference_ = false; bool input_enabled_ = false; @@ -49,6 +67,8 @@ protected: virtual int Read(int16_t* dest, int samples) = 0; virtual int Write(const int16_t* data, int samples) = 0; + virtual void EnableInput(bool enable); + virtual void EnableOutput(bool enable); }; #endif // _AUDIO_CODEC_H diff --git a/main/audio_codecs/box_audio_codec.cc b/main/audio_codecs/box_audio_codec.cc index bd0a7f1b..906b1179 100644 --- a/main/audio_codecs/box_audio_codec.cc +++ b/main/audio_codecs/box_audio_codec.cc @@ -2,6 +2,7 @@ #include #include +#include static const char TAG[] = "BoxAudioCodec"; @@ -174,8 +175,6 @@ void BoxAudioCodec::CreateDuplexChannels(gpio_num_t mclk, gpio_num_t bclk, gpio_ ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_handle_, &std_cfg)); ESP_ERROR_CHECK(i2s_channel_init_tdm_mode(rx_handle_, &tdm_cfg)); - ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_)); - ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_)); ESP_LOGI(TAG, "Duplex channels created"); } @@ -200,7 +199,7 @@ void BoxAudioCodec::EnableInput(bool enable) { fs.channel_mask |= ESP_CODEC_DEV_MAKE_CHANNEL_MASK(1); } ESP_ERROR_CHECK(esp_codec_dev_open(input_dev_, &fs)); - ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), 30.0)); + ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), 40.0)); } else { ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_)); } diff --git a/main/audio_codecs/box_audio_codec.h b/main/audio_codecs/box_audio_codec.h index 3c793ad4..9ad32834 100644 --- a/main/audio_codecs/box_audio_codec.h +++ b/main/audio_codecs/box_audio_codec.h @@ -3,16 +3,11 @@ #include "audio_codec.h" -#include -#include #include #include class BoxAudioCodec : public AudioCodec { private: - i2s_chan_handle_t tx_handle_ = nullptr; - i2s_chan_handle_t rx_handle_ = nullptr; - const audio_codec_data_if_t* data_if_ = nullptr; const audio_codec_ctrl_if_t* out_ctrl_if_ = nullptr; const audio_codec_if_t* out_codec_if_ = nullptr; @@ -27,6 +22,8 @@ private: virtual int Read(int16_t* dest, int samples) override; virtual int Write(const int16_t* data, int samples) override; + virtual void EnableInput(bool enable) override; + virtual void EnableOutput(bool enable) override; public: BoxAudioCodec(void* i2c_master_handle, int input_sample_rate, int output_sample_rate, @@ -35,8 +32,6 @@ public: virtual ~BoxAudioCodec(); virtual void SetOutputVolume(int volume) override; - virtual void EnableInput(bool enable) override; - virtual void EnableOutput(bool enable) override; }; #endif // _BOX_AUDIO_CODEC_H diff --git a/main/audio_codecs/no_audio_codec.cc b/main/audio_codecs/no_audio_codec.cc index bacb0457..ef12bf50 100644 --- a/main/audio_codecs/no_audio_codec.cc +++ b/main/audio_codecs/no_audio_codec.cc @@ -130,14 +130,11 @@ NoAudioCodec::NoAudioCodec(int input_sample_rate, int output_sample_rate, gpio_n std_cfg.gpio_cfg.dout = I2S_GPIO_UNUSED; std_cfg.gpio_cfg.din = mic_din; ESP_ERROR_CHECK(i2s_channel_init_std_mode(rx_handle_, &std_cfg)); - - ESP_ERROR_CHECK(i2s_channel_enable(tx_handle_)); - ESP_ERROR_CHECK(i2s_channel_enable(rx_handle_)); ESP_LOGI(TAG, "Simplex channels created"); } int NoAudioCodec::Write(const int16_t* data, int samples) { - int32_t buffer[samples]; + std::vector buffer(samples); // output_volume_: 0-100 // volume_factor_: 0-65536 @@ -154,15 +151,15 @@ int NoAudioCodec::Write(const int16_t* data, int samples) { } size_t bytes_written; - ESP_ERROR_CHECK(i2s_channel_write(tx_handle_, buffer, samples * sizeof(int32_t), &bytes_written, portMAX_DELAY)); + ESP_ERROR_CHECK(i2s_channel_write(tx_handle_, buffer.data(), samples * sizeof(int32_t), &bytes_written, portMAX_DELAY)); return bytes_written / sizeof(int32_t); } int NoAudioCodec::Read(int16_t* dest, int samples) { size_t bytes_read; - int32_t bit32_buffer[samples]; - if (i2s_channel_read(rx_handle_, bit32_buffer, samples * sizeof(int32_t), &bytes_read, portMAX_DELAY) != ESP_OK) { + std::vector bit32_buffer(samples); + if (i2s_channel_read(rx_handle_, bit32_buffer.data(), samples * sizeof(int32_t), &bytes_read, portMAX_DELAY) != ESP_OK) { ESP_LOGE(TAG, "Read Failed!"); return 0; } diff --git a/main/audio_codecs/no_audio_codec.h b/main/audio_codecs/no_audio_codec.h index fa69267a..510bb8b0 100644 --- a/main/audio_codecs/no_audio_codec.h +++ b/main/audio_codecs/no_audio_codec.h @@ -3,14 +3,10 @@ #include "audio_codec.h" -#include #include class NoAudioCodec : public AudioCodec { private: - i2s_chan_handle_t tx_handle_ = nullptr; - i2s_chan_handle_t rx_handle_ = nullptr; - virtual int Write(const int16_t* data, int samples) override; virtual int Read(int16_t* dest, int samples) override; diff --git a/main/board.cc b/main/board.cc index 36de3e8a..af9cdbbc 100644 --- a/main/board.cc +++ b/main/board.cc @@ -7,7 +7,7 @@ // static const char *TAG = "Board"; -bool Board::GetBatteryVoltage(int &voltage, bool& charging) { +bool Board::GetBatteryLevel(int &level, bool& charging) { return false; } diff --git a/main/board.h b/main/board.h index 67110465..d9e06725 100644 --- a/main/board.h +++ b/main/board.h @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include "led.h" @@ -36,8 +38,10 @@ public: virtual Display* GetDisplay() = 0; virtual Http* CreateHttp() = 0; virtual WebSocket* CreateWebSocket() = 0; + virtual Mqtt* CreateMqtt() = 0; + virtual Udp* CreateUdp() = 0; virtual bool GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) = 0; - virtual bool GetBatteryVoltage(int &voltage, bool& charging); + virtual bool GetBatteryLevel(int &level, bool& charging); virtual std::string GetJson(); }; diff --git a/main/boards/kevin-box-0/kevin_box_board.cc b/main/boards/kevin-box-0/kevin_box_board.cc index a2cdc751..ebabf08a 100644 --- a/main/boards/kevin-box-0/kevin_box_board.cc +++ b/main/boards/kevin-box-0/kevin_box_board.cc @@ -10,16 +10,11 @@ #include #include #include -#include -#include -#include static const char *TAG = "KevinBoxBoard"; class KevinBoxBoard : public Ml307Board { private: - adc_oneshot_unit_handle_t adc1_handle_; - adc_cali_handle_t adc1_cali_handle_; i2c_master_bus_handle_t display_i2c_bus_; i2c_master_bus_handle_t codec_i2c_bus_; Button boot_button_; @@ -50,27 +45,6 @@ private: gpio_set_level(GPIO_NUM_15, 1); } - void InitializeADC() { - adc_oneshot_unit_init_cfg_t init_config1 = {}; - init_config1.unit_id = ADC_UNIT_1; - ESP_ERROR_CHECK(adc_oneshot_new_unit(&init_config1, &adc1_handle_)); - - //-------------ADC1 Config---------------// - adc_oneshot_chan_cfg_t config = { - .atten = ADC_ATTEN_DB_12, - .bitwidth = ADC_BITWIDTH_DEFAULT, - }; - ESP_ERROR_CHECK(adc_oneshot_config_channel(adc1_handle_, ADC_CHANNEL_0, &config)); - - adc_cali_curve_fitting_config_t cali_config = { - .unit_id = ADC_UNIT_1, - .chan = ADC_CHANNEL_0, - .atten = ADC_ATTEN_DB_12, - .bitwidth = ADC_BITWIDTH_DEFAULT, - }; - ESP_ERROR_CHECK(adc_cali_create_scheme_curve_fitting(&cali_config, &adc1_cali_handle_)); - } - void InitializeDisplayI2c() { i2c_master_bus_config_t bus_config = { .i2c_port = I2C_NUM_0, @@ -153,7 +127,6 @@ public: ESP_LOGI(TAG, "Initializing KevinBoxBoard"); InitializeDisplayI2c(); InitializeCodecI2c(); - InitializeADC(); MountStorage(); Enable4GModule(); @@ -178,14 +151,6 @@ public: static Ssd1306Display display(display_i2c_bus_, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y); return &display; } - - virtual bool GetBatteryVoltage(int &voltage, bool& charging) override { - ESP_ERROR_CHECK(adc_oneshot_get_calibrated_result(adc1_handle_, adc1_cali_handle_, ADC_CHANNEL_0, &voltage)); - voltage *= 3; - charging = false; - ESP_LOGI(TAG, "Battery voltage: %d, Charging: %d", voltage, charging); - return true; - } }; DECLARE_BOARD(KevinBoxBoard); \ No newline at end of file diff --git a/main/boards/kevin-box-1/kevin_box_board.cc b/main/boards/kevin-box-1/kevin_box_board.cc index 03b386ae..bfb83dbf 100644 --- a/main/boards/kevin-box-1/kevin_box_board.cc +++ b/main/boards/kevin-box-1/kevin_box_board.cc @@ -10,16 +10,11 @@ #include #include #include -#include -#include -#include static const char *TAG = "KevinBoxBoard"; class KevinBoxBoard : public Ml307Board { private: - adc_oneshot_unit_handle_t adc1_handle_; - adc_cali_handle_t adc1_cali_handle_; i2c_master_bus_handle_t display_i2c_bus_; i2c_master_bus_handle_t codec_i2c_bus_; Button boot_button_; @@ -51,27 +46,6 @@ private: gpio_set_level(GPIO_NUM_18, 1); } - void InitializeADC() { - adc_oneshot_unit_init_cfg_t init_config1 = {}; - init_config1.unit_id = ADC_UNIT_1; - ESP_ERROR_CHECK(adc_oneshot_new_unit(&init_config1, &adc1_handle_)); - - //-------------ADC1 Config---------------// - adc_oneshot_chan_cfg_t config = { - .atten = ADC_ATTEN_DB_12, - .bitwidth = ADC_BITWIDTH_DEFAULT, - }; - ESP_ERROR_CHECK(adc_oneshot_config_channel(adc1_handle_, ADC_CHANNEL_0, &config)); - - adc_cali_curve_fitting_config_t cali_config = { - .unit_id = ADC_UNIT_1, - .chan = ADC_CHANNEL_0, - .atten = ADC_ATTEN_DB_12, - .bitwidth = ADC_BITWIDTH_DEFAULT, - }; - ESP_ERROR_CHECK(adc_cali_create_scheme_curve_fitting(&cali_config, &adc1_cali_handle_)); - } - void InitializeDisplayI2c() { i2c_master_bus_config_t bus_config = { .i2c_port = I2C_NUM_0, @@ -154,19 +128,9 @@ public: ESP_LOGI(TAG, "Initializing KevinBoxBoard"); InitializeDisplayI2c(); InitializeCodecI2c(); - InitializeADC(); MountStorage(); Enable4GModule(); - gpio_config_t charging_io = { - .pin_bit_mask = (1ULL << 2), - .mode = GPIO_MODE_INPUT, - .pull_up_en = GPIO_PULLUP_ENABLE, - .pull_down_en = GPIO_PULLDOWN_DISABLE, - .intr_type = GPIO_INTR_DISABLE, - }; - gpio_config(&charging_io); - InitializeButtons(); Ml307Board::Initialize(); @@ -188,14 +152,6 @@ public: static Ssd1306Display display(display_i2c_bus_, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y); return &display; } - - virtual bool GetBatteryVoltage(int &voltage, bool& charging) override { - ESP_ERROR_CHECK(adc_oneshot_get_calibrated_result(adc1_handle_, adc1_cali_handle_, ADC_CHANNEL_0, &voltage)); - voltage *= 3; - charging = gpio_get_level(GPIO_NUM_2) == 0; - ESP_LOGI(TAG, "Battery voltage: %d, Charging: %d", voltage, charging); - return true; - } }; DECLARE_BOARD(KevinBoxBoard); \ No newline at end of file diff --git a/main/boards/kevin-box-2/axp2101.cc b/main/boards/kevin-box-2/axp2101.cc new file mode 100644 index 00000000..6f307a72 --- /dev/null +++ b/main/boards/kevin-box-2/axp2101.cc @@ -0,0 +1,84 @@ +#include "axp2101.h" + +#include + +static const char *TAG = "AXP2101"; + +bool Axp2101::Initialize(i2c_master_bus_handle_t i2c_bus, int i2c_device_address) { + i2c_device_config_t axp2101_cfg = { + .dev_addr_length = I2C_ADDR_BIT_LEN_7, + .device_address = (uint16_t)i2c_device_address, + .scl_speed_hz = 100000, + .scl_wait_us = 0, + .flags = { + .disable_ack_check = 0, + }, + }; + ESP_ERROR_CHECK(i2c_master_bus_add_device(i2c_bus, &axp2101_cfg, &i2c_device_)); + + WriteReg(0x93, 0x1c); // 配置aldo2输出为3.3v + + uint8_t value = ReadReg(0x90); // XPOWERS_AXP2101_LDO_ONOFF_CTRL0 + value = value | 0x02; // set bit 1 (ALDO2) + WriteReg(0x90, value); // and power channels now enabled + + WriteReg(0x64, 0x03); // CV charger voltage setting to 42V + value = ReadReg(0x62); + ESP_LOGI(TAG, "axp2101 read 0x62 get: 0x%X", value); + + WriteReg(0x61, 0x05); // set Main battery precharge current to 125mA + WriteReg(0x62, 0x10); // set Main battery charger current to 900mA ( 0x08-200mA, 0x09-300mA, 0x0A-400mA ) + WriteReg(0x63, 0x15); // set Main battery term charge current to 125mA + value = ReadReg(0x62); + ESP_LOGI(TAG, "axp2101 read 0x62 get: 0x%X", value); + + value = ReadReg(0x18); + ESP_LOGI(TAG, "axp2101 read 0x18 get: 0x%X", value); + value = value & 0b11100000; + value = value | 0b00001110; + WriteReg(0x18, value); + value = ReadReg(0x18); + ESP_LOGI(TAG, "axp2101 read 0x18 get: 0x%X", value); + + WriteReg(0x14, 0x00); // set minimum system voltage to 4.1V (default 4.7V), for poor USB cables + WriteReg(0x15, 0x00); // set input voltage limit to 3.88v, for poor USB cables + WriteReg(0x16, 0x05); // set input voltage limit to 3.88v, for poor USB cables + + WriteReg(0x24, 0x01); // set Vsys for PWROFF threshold to 3.2V (default - 2.6V and kill battery) + WriteReg(0x50, 0x14); // set TS pin to EXTERNAL input (not temperature) + return true; +} + +void Axp2101::WriteReg(uint8_t reg, uint8_t value) { + uint8_t buffer[2]; + buffer[0] = reg; + buffer[1] = value; + ESP_ERROR_CHECK(i2c_master_transmit(i2c_device_, buffer, 2, 100)); +} + +uint8_t Axp2101::ReadReg(uint8_t reg) { + uint8_t buffer[1]; + ESP_ERROR_CHECK(i2c_master_transmit_receive(i2c_device_, ®, 1, buffer, 1, 100)); + return buffer[0]; +} + +bool Axp2101::IsCharging() { + uint8_t value = ReadReg(0x01); + return (value & 0b01100000) == 0b00100000; +} + +bool Axp2101::IsChargingDone() { + uint8_t value = ReadReg(0x01); + return (value & 0b00000111) == 0b00000100; +} + +int Axp2101::GetBatteryLevel() { + uint8_t value = ReadReg(0xA4); + return value; +} + +void Axp2101::PowerOff() { + uint8_t value = ReadReg(0x10); + value = value | 0x01; + WriteReg(0x10, value); +} diff --git a/main/boards/kevin-box-2/axp2101.h b/main/boards/kevin-box-2/axp2101.h new file mode 100644 index 00000000..45f744e8 --- /dev/null +++ b/main/boards/kevin-box-2/axp2101.h @@ -0,0 +1,22 @@ +#ifndef __AXP2101_H__ +#define __AXP2101_H__ + +#include + +class Axp2101 { +public: + Axp2101() = default; + bool Initialize(i2c_master_bus_handle_t i2c_bus, int i2c_device_address); + bool IsCharging(); + bool IsChargingDone(); + int GetBatteryLevel(); + void PowerOff(); + +private: + i2c_master_dev_handle_t i2c_device_ = nullptr; + + void WriteReg(uint8_t reg, uint8_t value); + uint8_t ReadReg(uint8_t reg); +}; + +#endif diff --git a/main/boards/kevin-box-2/config.h b/main/boards/kevin-box-2/config.h new file mode 100644 index 00000000..a2729001 --- /dev/null +++ b/main/boards/kevin-box-2/config.h @@ -0,0 +1,41 @@ +#ifndef _BOARD_CONFIG_H_ +#define _BOARD_CONFIG_H_ + +#include + +#define AUDIO_INPUT_SAMPLE_RATE 24000 +#define AUDIO_OUTPUT_SAMPLE_RATE 24000 + +#define AUDIO_INPUT_REFERENCE true + +#define AUDIO_I2S_GPIO_MCLK GPIO_NUM_40 +#define AUDIO_I2S_GPIO_WS GPIO_NUM_47 +#define AUDIO_I2S_GPIO_BCLK GPIO_NUM_38 +#define AUDIO_I2S_GPIO_DIN GPIO_NUM_39 +#define AUDIO_I2S_GPIO_DOUT GPIO_NUM_48 + +#define AUDIO_CODEC_PA_PIN GPIO_NUM_9 +#define AUDIO_CODEC_I2C_SDA_PIN GPIO_NUM_42 +#define AUDIO_CODEC_I2C_SCL_PIN GPIO_NUM_41 +#define AUDIO_CODEC_ES8311_ADDR ES8311_CODEC_DEFAULT_ADDR +#define AUDIO_CODEC_ES7210_ADDR ES7210_CODEC_DEFAULT_ADDR + +#define BUILTIN_LED_GPIO GPIO_NUM_3 +#define BOOT_BUTTON_GPIO GPIO_NUM_0 +#define VOLUME_UP_BUTTON_GPIO GPIO_NUM_1 +#define VOLUME_DOWN_BUTTON_GPIO GPIO_NUM_2 + +#define DISPLAY_SDA_PIN GPIO_NUM_7 +#define DISPLAY_SCL_PIN GPIO_NUM_8 +#define DISPLAY_WIDTH 128 +#define DISPLAY_HEIGHT 64 +#define DISPLAY_MIRROR_X false +#define DISPLAY_MIRROR_Y false + +#define ML307_RX_PIN GPIO_NUM_5 +#define ML307_TX_PIN GPIO_NUM_6 + +#define AXP2101_I2C_ADDR 0x34 + + +#endif // _BOARD_CONFIG_H_ diff --git a/main/boards/kevin-box-2/kevin_box_board.cc b/main/boards/kevin-box-2/kevin_box_board.cc new file mode 100644 index 00000000..3bbbb376 --- /dev/null +++ b/main/boards/kevin-box-2/kevin_box_board.cc @@ -0,0 +1,168 @@ +#include "boards/ml307_board.h" +#include "audio_codecs/box_audio_codec.h" +#include "display/ssd1306_display.h" +#include "application.h" +#include "button.h" +#include "led.h" +#include "config.h" +#include "axp2101.h" + +#include +#include +#include +#include + +static const char *TAG = "KevinBoxBoard"; + +class KevinBoxBoard : public Ml307Board { +private: + i2c_master_bus_handle_t display_i2c_bus_; + i2c_master_bus_handle_t codec_i2c_bus_; + Axp2101 axp2101_; + Button boot_button_; + Button volume_up_button_; + Button volume_down_button_; + uint8_t _data_buffer[2]; + + void MountStorage() { + // Mount the storage partition + esp_vfs_spiffs_conf_t conf = { + .base_path = "/storage", + .partition_label = "storage", + .max_files = 5, + .format_if_mount_failed = true, + }; + esp_vfs_spiffs_register(&conf); + } + + void Enable4GModule() { + // Make GPIO HIGH to enable the 4G module + gpio_config_t ml307_enable_config = { + .pin_bit_mask = (1ULL << 4), + .mode = GPIO_MODE_OUTPUT, + .pull_up_en = GPIO_PULLUP_DISABLE, + .pull_down_en = GPIO_PULLDOWN_DISABLE, + .intr_type = GPIO_INTR_DISABLE, + }; + gpio_config(&ml307_enable_config); + gpio_set_level(GPIO_NUM_4, 1); + } + + void InitializeDisplayI2c() { + i2c_master_bus_config_t bus_config = { + .i2c_port = I2C_NUM_0, + .sda_io_num = DISPLAY_SDA_PIN, + .scl_io_num = DISPLAY_SCL_PIN, + .clk_source = I2C_CLK_SRC_DEFAULT, + .glitch_ignore_cnt = 7, + .intr_priority = 0, + .trans_queue_depth = 0, + .flags = { + .enable_internal_pullup = 1, + }, + }; + ESP_ERROR_CHECK(i2c_new_master_bus(&bus_config, &display_i2c_bus_)); + } + + void InitializeCodecI2c() { + // Initialize I2C peripheral + i2c_master_bus_config_t i2c_bus_cfg = { + .i2c_port = I2C_NUM_1, + .sda_io_num = AUDIO_CODEC_I2C_SDA_PIN, + .scl_io_num = AUDIO_CODEC_I2C_SCL_PIN, + .clk_source = I2C_CLK_SRC_DEFAULT, + .glitch_ignore_cnt = 7, + .intr_priority = 0, + .trans_queue_depth = 0, + .flags = { + .enable_internal_pullup = 1, + }, + }; + ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &codec_i2c_bus_)); + } + + void InitializeButtons() { + boot_button_.OnClick([this]() { + Application::GetInstance().ToggleChatState(); + }); + + volume_up_button_.OnClick([this]() { + auto codec = GetAudioCodec(); + auto volume = codec->output_volume() + 10; + if (volume > 100) { + volume = 100; + } + codec->SetOutputVolume(volume); + GetDisplay()->ShowNotification("Volume\n" + std::to_string(volume)); + }); + + volume_up_button_.OnLongPress([this]() { + auto codec = GetAudioCodec(); + codec->SetOutputVolume(100); + GetDisplay()->ShowNotification("Volume\n100"); + }); + + volume_down_button_.OnClick([this]() { + auto codec = GetAudioCodec(); + auto volume = codec->output_volume() - 10; + if (volume < 0) { + volume = 0; + } + codec->SetOutputVolume(volume); + GetDisplay()->ShowNotification("Volume\n" + std::to_string(volume)); + }); + + volume_down_button_.OnLongPress([this]() { + auto codec = GetAudioCodec(); + codec->SetOutputVolume(0); + GetDisplay()->ShowNotification("Volume\n0"); + }); + } + +public: + KevinBoxBoard() : Ml307Board(ML307_TX_PIN, ML307_RX_PIN, 4096), + boot_button_(BOOT_BUTTON_GPIO), + volume_up_button_(VOLUME_UP_BUTTON_GPIO), + volume_down_button_(VOLUME_DOWN_BUTTON_GPIO) { + } + + virtual void Initialize() override { + ESP_LOGI(TAG, "Initializing KevinBoxBoard"); + InitializeDisplayI2c(); + InitializeCodecI2c(); + axp2101_.Initialize(codec_i2c_bus_, AXP2101_I2C_ADDR); + + MountStorage(); + Enable4GModule(); + + InitializeButtons(); + + Ml307Board::Initialize(); + } + + virtual Led* GetBuiltinLed() override { + static Led led(BUILTIN_LED_GPIO); + return &led; + } + + virtual AudioCodec* GetAudioCodec() override { + static BoxAudioCodec audio_codec(codec_i2c_bus_, AUDIO_INPUT_SAMPLE_RATE, AUDIO_OUTPUT_SAMPLE_RATE, + AUDIO_I2S_GPIO_MCLK, AUDIO_I2S_GPIO_BCLK, AUDIO_I2S_GPIO_WS, AUDIO_I2S_GPIO_DOUT, AUDIO_I2S_GPIO_DIN, + AUDIO_CODEC_PA_PIN, AUDIO_CODEC_ES8311_ADDR, AUDIO_CODEC_ES7210_ADDR, AUDIO_INPUT_REFERENCE); + return &audio_codec; + } + + virtual Display* GetDisplay() override { + static Ssd1306Display display(display_i2c_bus_, DISPLAY_WIDTH, DISPLAY_HEIGHT, DISPLAY_MIRROR_X, DISPLAY_MIRROR_Y); + return &display; + } + + virtual bool GetBatteryLevel(int &level, bool& charging) override { + level = axp2101_.GetBatteryLevel(); + charging = axp2101_.IsCharging(); + ESP_LOGI(TAG, "Battery level: %d, Charging: %d", level, charging); + return true; + } +}; + +DECLARE_BOARD(KevinBoxBoard); \ No newline at end of file diff --git a/main/boards/ml307_board.cc b/main/boards/ml307_board.cc index cd18bc5c..520c75e8 100644 --- a/main/boards/ml307_board.cc +++ b/main/boards/ml307_board.cc @@ -6,6 +6,8 @@ #include #include #include +#include +#include static const char *TAG = "Ml307Board"; @@ -83,6 +85,14 @@ WebSocket* Ml307Board::CreateWebSocket() { return new WebSocket(new Ml307SslTransport(modem_, 0)); } +Mqtt* Ml307Board::CreateMqtt() { + return new Ml307Mqtt(modem_, 0); +} + +Udp* Ml307Board::CreateUdp() { + return new Ml307Udp(modem_, 0); +} + bool Ml307Board::GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) { if (!modem_.network_ready()) { return false; @@ -96,16 +106,11 @@ bool Ml307Board::GetNetworkState(std::string& network_name, int& signal_quality, std::string Ml307Board::GetBoardJson() { // Set the board type for OTA std::string board_type = BOARD_TYPE; - std::string module_name = modem_.GetModuleName(); - std::string carrier_name = modem_.GetCarrierName(); - std::string imei = modem_.GetImei(); - std::string iccid = modem_.GetIccid(); - int csq = modem_.GetCsq(); std::string board_json = std::string("{\"type\":\"" + board_type + "\","); - board_json += "\"revision\":\"" + module_name + "\","; - board_json += "\"carrier\":\"" + carrier_name + "\","; - board_json += "\"csq\":\"" + std::to_string(csq) + "\","; - board_json += "\"imei\":\"" + imei + "\","; - board_json += "\"iccid\":\"" + iccid + "\"}"; + board_json += "\"revision\":\"" + modem_.GetModuleName() + "\","; + board_json += "\"carrier\":\"" + modem_.GetCarrierName() + "\","; + board_json += "\"csq\":\"" + std::to_string(modem_.GetCsq()) + "\","; + board_json += "\"imei\":\"" + modem_.GetImei() + "\","; + board_json += "\"iccid\":\"" + modem_.GetIccid() + "\"}"; return board_json; } diff --git a/main/boards/ml307_board.h b/main/boards/ml307_board.h index 32d97b93..9c04812f 100644 --- a/main/boards/ml307_board.h +++ b/main/boards/ml307_board.h @@ -17,6 +17,8 @@ public: virtual void StartNetwork() override; virtual Http* CreateHttp() override; virtual WebSocket* CreateWebSocket() override; + virtual Mqtt* CreateMqtt() override; + virtual Udp* CreateUdp() override; virtual bool GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) override; }; diff --git a/main/boards/wifi_board.cc b/main/boards/wifi_board.cc index b26903ea..0ab651dd 100644 --- a/main/boards/wifi_board.cc +++ b/main/boards/wifi_board.cc @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -39,12 +41,15 @@ void WifiBoard::StartNetwork() { display->SetText(std::string("Connect to WiFi\n") + wifi_station.GetSsid()); wifi_station.Start(); if (!wifi_station.IsConnected()) { - application.Alert("Info", "Configuring WiFi"); builtin_led->SetBlue(); builtin_led->Blink(1000, 500); auto& wifi_ap = WifiConfigurationAp::GetInstance(); wifi_ap.SetSsidPrefix("Xiaozhi"); wifi_ap.Start(); + // 播报配置 WiFi 的提示 + application.Alert("Info", "Configuring WiFi"); + // 显示 WiFi 配置 AP 的 SSID 和 Web 服务器 URL + display->SetText(wifi_ap.GetSsid() + "\n" + wifi_ap.GetWebServerUrl()); // Wait forever until reset after configuration while (true) { vTaskDelay(pdMS_TO_TICKS(1000)); @@ -69,6 +74,14 @@ WebSocket* WifiBoard::CreateWebSocket() { } } +Mqtt* WifiBoard::CreateMqtt() { + return new EspMqtt(); +} + +Udp* WifiBoard::CreateUdp() { + return new EspUdp(); +} + bool WifiBoard::GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) { if (wifi_config_mode_) { auto& wifi_ap = WifiConfigurationAp::GetInstance(); diff --git a/main/boards/wifi_board.h b/main/boards/wifi_board.h index 88145fdd..9675082e 100644 --- a/main/boards/wifi_board.h +++ b/main/boards/wifi_board.h @@ -14,6 +14,8 @@ public: virtual void StartNetwork() override; virtual Http* CreateHttp() override; virtual WebSocket* CreateWebSocket() override; + virtual Mqtt* CreateMqtt() override; + virtual Udp* CreateUdp() override; virtual bool GetNetworkState(std::string& network_name, int& signal_quality, std::string& signal_quality_text) override; }; diff --git a/main/display.cc b/main/display.cc index 93a9a1da..fd2cf298 100644 --- a/main/display.cc +++ b/main/display.cc @@ -123,10 +123,10 @@ void Display::UpdateDisplay() { } } - int battery_voltage; + int battery_level; bool charging; - if (board.GetBatteryVoltage(battery_voltage, charging)) { - text += "\n" + std::to_string(battery_voltage) + "mV"; + if (board.GetBatteryLevel(battery_level, charging)) { + text += "\nPower " + std::to_string(battery_level) + "%"; if (charging) { text += " (Charging)"; } diff --git a/main/idf_component.yml b/main/idf_component.yml index 8563a68c..bcba1495 100644 --- a/main/idf_component.yml +++ b/main/idf_component.yml @@ -2,7 +2,7 @@ dependencies: 78/esp-wifi-connect: "~1.3.0" 78/esp-opus-encoder: "~1.1.0" - 78/esp-ml307: "~1.4.0" + 78/esp-ml307: "~1.6.0" espressif/led_strip: "^2.4.1" espressif/esp_codec_dev: "^1.3.1" espressif/esp-sr: "^1.9.0" diff --git a/main/ota.cc b/main/ota.cc index ef9ed78f..a4686eb5 100644 --- a/main/ota.cc +++ b/main/ota.cc @@ -69,6 +69,19 @@ void Ota::CheckVersion() { ESP_LOGE(TAG, "Failed to parse JSON response"); return; } + + cJSON *mqtt = cJSON_GetObjectItem(root, "mqtt"); + if (mqtt != NULL) { + cJSON *item = NULL; + cJSON_ArrayForEach(item, mqtt) { + if (item->type == cJSON_String) { + mqtt_config_[item->string] = item->valuestring; + ESP_LOGI(TAG, "MQTT config: %s = %s", item->string, item->valuestring); + } + } + has_mqtt_config_ = true; + } + cJSON *firmware = cJSON_GetObjectItem(root, "firmware"); if (firmware == NULL) { ESP_LOGE(TAG, "Failed to get firmware object"); @@ -148,12 +161,12 @@ void Ota::Upgrade(const std::string& firmware_url) { return; } - char buffer[4096]; + std::vector buffer(4096); size_t total_read = 0, recent_read = 0; auto last_calc_time = esp_timer_get_time(); while (true) { taskYIELD(); // Avoid watchdog timeout - int ret = http->Read(buffer, sizeof(buffer)); + int ret = http->Read(buffer.data(), buffer.size()); if (ret < 0) { ESP_LOGE(TAG, "Failed to read HTTP data: %s", esp_err_to_name(ret)); delete http; @@ -179,7 +192,7 @@ void Ota::Upgrade(const std::string& firmware_url) { if (!image_header_checked) { - image_header.append(buffer, ret); + image_header.append(buffer.data(), ret); if (image_header.size() >= sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t) + sizeof(esp_app_desc_t)) { esp_app_desc_t new_app_info; memcpy(&new_app_info, image_header.data() + sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t), sizeof(esp_app_desc_t)); @@ -202,7 +215,7 @@ void Ota::Upgrade(const std::string& firmware_url) { image_header_checked = true; } } - auto err = esp_ota_write(update_handle, buffer, ret); + auto err = esp_ota_write(update_handle, buffer.data(), ret); if (err != ESP_OK) { ESP_LOGE(TAG, "Failed to write OTA data: %s", esp_err_to_name(err)); esp_ota_abort(update_handle); diff --git a/main/ota.h b/main/ota.h index 157ef179..a27cd277 100644 --- a/main/ota.h +++ b/main/ota.h @@ -15,16 +15,21 @@ public: void SetPostData(const std::string& post_data); void CheckVersion(); bool HasNewVersion() { return has_new_version_; } + bool HasMqttConfig() { return has_mqtt_config_; } void StartUpgrade(std::function callback); void MarkCurrentVersionValid(); + std::map& GetMqttConfig() { return mqtt_config_; } + private: std::string check_version_url_; bool has_new_version_ = false; + bool has_mqtt_config_ = false; std::string firmware_version_; std::string firmware_url_; std::string post_data_; std::map headers_; + std::map mqtt_config_; void Upgrade(const std::string& firmware_url); std::function upgrade_callback_; diff --git a/main/protocol.cc b/main/protocol.cc new file mode 100644 index 00000000..e69de29b diff --git a/main/protocol.h b/main/protocol.h new file mode 100644 index 00000000..c64c925a --- /dev/null +++ b/main/protocol.h @@ -0,0 +1,23 @@ +#ifndef PROTOCOL_H +#define PROTOCOL_H + +#include +#include +#include + +class Protocol { +public: + virtual ~Protocol() = default; + + virtual void OnIncomingAudio(std::function callback) = 0; + virtual void OnIncomingJson(std::function callback) = 0; + virtual void SendAudio(const std::string& data) = 0; + virtual void SendText(const std::string& text) = 0; + virtual bool OpenAudioChannel() = 0; + virtual void CloseAudioChannel() = 0; + virtual void OnAudioChannelOpened(std::function callback) = 0; + virtual void OnAudioChannelClosed(std::function callback) = 0; +}; + +#endif // PROTOCOL_H + diff --git a/main/protocols/mqtt_protocol.cc b/main/protocols/mqtt_protocol.cc new file mode 100644 index 00000000..2a41c932 --- /dev/null +++ b/main/protocols/mqtt_protocol.cc @@ -0,0 +1,281 @@ +#include "mqtt_protocol.h" +#include "board.h" + +#include +#include +#include +#include +#include + +#define TAG "MQTT" + +MqttProtocol::MqttProtocol(std::map& config) { + event_group_handle_ = xEventGroupCreate(); + + endpoint_ = config["endpoint"]; + client_id_ = config["client_id"]; + username_ = config["username"]; + password_ = config["password"]; + subscribe_topic_ = config["subscribe_topic"]; + publish_topic_ = config["publish_topic"]; + + StartMqttClient(); +} + +MqttProtocol::~MqttProtocol() { + ESP_LOGI(TAG, "MqttProtocol deinit"); + if (udp_ != nullptr) { + delete udp_; + } + if (mqtt_ != nullptr) { + delete mqtt_; + } + vEventGroupDelete(event_group_handle_); +} + +bool MqttProtocol::StartMqttClient() { + if (mqtt_ != nullptr) { + ESP_LOGW(TAG, "Mqtt client already started"); + delete mqtt_; + } + + mqtt_ = Board::GetInstance().CreateMqtt(); + mqtt_->SetKeepAlive(90); + + mqtt_->OnDisconnected([this]() { + ESP_LOGI(TAG, "Disconnected from endpoint"); + }); + + mqtt_->OnMessage([this](const std::string& topic, const std::string& payload) { + cJSON* root = cJSON_Parse(payload.c_str()); + if (root == nullptr) { + ESP_LOGE(TAG, "Failed to parse json message %s", payload.c_str()); + return; + } + cJSON* type = cJSON_GetObjectItem(root, "type"); + if (type == nullptr) { + ESP_LOGE(TAG, "Message type is not specified"); + cJSON_Delete(root); + return; + } + if (on_incoming_json_ != nullptr) { + on_incoming_json_(root); + } + if (strcmp(type->valuestring, "hello") == 0) { + ParseServerHello(root); + } else if (strcmp(type->valuestring, "goodbye") == 0) { + auto session_id = cJSON_GetObjectItem(root, "session_id"); + if (session_id == nullptr || session_id_ == session_id->valuestring) { + if (on_audio_channel_closed_ != nullptr) { + on_audio_channel_closed_(); + } + } + } + cJSON_Delete(root); + }); + + ESP_LOGI(TAG, "Connecting to endpoint %s", endpoint_.c_str()); + if (!mqtt_->Connect(endpoint_, 8883, client_id_, username_, password_)) { + ESP_LOGE(TAG, "Failed to connect to endpoint"); + return false; + } + + ESP_LOGI(TAG, "Connected to endpoint"); + if (!subscribe_topic_.empty()) { + mqtt_->Subscribe(subscribe_topic_, 2); + } + return true; +} + +void MqttProtocol::SendText(const std::string& text) { + if (publish_topic_.empty()) { + ESP_LOGE(TAG, "Publish topic is not specified"); + return; + } + mqtt_->Publish(publish_topic_, text); +} + +void MqttProtocol::SendAudio(const std::string& data) { + std::string nonce(aes_nonce_); + *(uint16_t*)&nonce[2] = htons(data.size()); + *(uint32_t*)&nonce[12] = htonl(++local_sequence_); + + std::string encrypted; + encrypted.resize(aes_nonce_.size() + data.size()); + memcpy(encrypted.data(), nonce.data(), nonce.size()); + + size_t nc_off = 0; + uint8_t stream_block[16] = {0}; + if (mbedtls_aes_crypt_ctr(&aes_ctx_, data.size(), &nc_off, (uint8_t*)nonce.c_str(), stream_block, + (uint8_t*)data.data(), (uint8_t*)&encrypted[nonce.size()]) != 0) { + ESP_LOGE(TAG, "Failed to encrypt audio data"); + return; + } + + std::lock_guard lock(channel_mutex_); + if (udp_ == nullptr) { + return; + } + udp_->Send(encrypted); +} + +void MqttProtocol::CloseAudioChannel() { + { + std::lock_guard lock(channel_mutex_); + if (udp_ != nullptr) { + delete udp_; + udp_ = nullptr; + } + } + + std::string message = "{"; + message += "\"type\":\"goodbye\""; + message += "}"; + SendText(message); + + if (on_audio_channel_closed_ != nullptr) { + on_audio_channel_closed_(); + } +} + +bool MqttProtocol::OpenAudioChannel() { + if (!mqtt_->IsConnected()) { + ESP_LOGE(TAG, "MQTT is not connected, try to connect now"); + if (!StartMqttClient()) { + ESP_LOGE(TAG, "Failed to connect to MQTT"); + return false; + } + } + + session_id_ = ""; + + // 发送 hello 消息申请 UDP 通道 + std::string message = "{"; + message += "\"type\":\"hello\","; + message += "\"version\": 3,"; + message += "\"transport\":\"udp\","; + message += "\"audio_params\":{"; + message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":60"; + message += "}}"; + SendText(message); + + // 等待服务器响应 + EventBits_t bits = xEventGroupWaitBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT, pdTRUE, pdFALSE, pdMS_TO_TICKS(10000)); + if (!(bits & MQTT_PROTOCOL_SERVER_HELLO_EVENT)) { + ESP_LOGE(TAG, "Failed to receive server hello"); + return false; + } + + std::lock_guard lock(channel_mutex_); + if (udp_ != nullptr) { + delete udp_; + } + udp_ = Board::GetInstance().CreateUdp(); + udp_->OnMessage([this](const std::string& data) { + if (data.size() < sizeof(aes_nonce_)) { + ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size()); + return; + } + if (data[0] != 0x01) { + ESP_LOGE(TAG, "Invalid audio packet type: %x", data[0]); + return; + } + uint32_t sequence = ntohl(*(uint32_t*)&data[12]); + if (sequence < remote_sequence_) { + ESP_LOGW(TAG, "Received audio packet with old sequence: %lu, expected: %lu", sequence, remote_sequence_); + return; + } + + std::string decrypted; + size_t decrypted_size = data.size() - aes_nonce_.size(); + size_t nc_off = 0; + uint8_t stream_block[16] = {0}; + decrypted.resize(decrypted_size); + auto nonce = (uint8_t*)data.data(); + auto encrypted = (uint8_t*)data.data() + aes_nonce_.size(); + int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)decrypted.data()); + if (ret != 0) { + ESP_LOGE(TAG, "Failed to decrypt audio data, ret: %d", ret); + return; + } + if (on_incoming_audio_ != nullptr) { + on_incoming_audio_(decrypted); + } + remote_sequence_ = sequence; + }); + + udp_->Connect(udp_server_, udp_port_); + + if (on_audio_channel_opened_ != nullptr) { + on_audio_channel_opened_(); + } + return true; +} + +void MqttProtocol::OnIncomingJson(std::function callback) { + on_incoming_json_ = callback; +} + +void MqttProtocol::OnIncomingAudio(std::function callback) { + on_incoming_audio_ = callback; +} + +void MqttProtocol::OnAudioChannelOpened(std::function callback) { + on_audio_channel_opened_ = callback; +} + +void MqttProtocol::OnAudioChannelClosed(std::function callback) { + on_audio_channel_closed_ = callback; +} + +void MqttProtocol::ParseServerHello(const cJSON* root) { + auto transport = cJSON_GetObjectItem(root, "transport"); + if (transport == nullptr || strcmp(transport->valuestring, "udp") != 0) { + ESP_LOGE(TAG, "Unsupported transport: %s", transport->valuestring); + return; + } + + auto session_id = cJSON_GetObjectItem(root, "session_id"); + if (session_id != nullptr) { + session_id_ = session_id->valuestring; + } + + auto udp = cJSON_GetObjectItem(root, "udp"); + if (udp == nullptr) { + ESP_LOGE(TAG, "UDP is not specified"); + return; + } + udp_server_ = cJSON_GetObjectItem(udp, "server")->valuestring; + udp_port_ = cJSON_GetObjectItem(udp, "port")->valueint; + auto key = cJSON_GetObjectItem(udp, "key")->valuestring; + auto nonce = cJSON_GetObjectItem(udp, "nonce")->valuestring; + + // auto encryption = cJSON_GetObjectItem(udp, "encryption")->valuestring; + // ESP_LOGI(TAG, "UDP server: %s, port: %d, encryption: %s", udp_server_.c_str(), udp_port_, encryption); + aes_nonce_ = DecodeHexString(nonce); + mbedtls_aes_init(&aes_ctx_); + mbedtls_aes_setkey_enc(&aes_ctx_, (const unsigned char*)DecodeHexString(key).c_str(), 128); + local_sequence_ = 0; + remote_sequence_ = 0; + xEventGroupSetBits(event_group_handle_, MQTT_PROTOCOL_SERVER_HELLO_EVENT); +} + + +static const char hex_chars[] = "0123456789ABCDEF"; +// 辅助函数,将单个十六进制字符转换为对应的数值 +static inline uint8_t CharToHex(char c) { + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'A' && c <= 'F') return c - 'A' + 10; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + return 0; // 对于无效输入,返回0 +} + +std::string MqttProtocol::DecodeHexString(const std::string& hex_string) { + std::string decoded; + decoded.reserve(hex_string.size() / 2); + for (size_t i = 0; i < hex_string.size(); i += 2) { + char byte = (CharToHex(hex_string[i]) << 4) | CharToHex(hex_string[i + 1]); + decoded.push_back(byte); + } + return decoded; +} diff --git a/main/protocols/mqtt_protocol.h b/main/protocols/mqtt_protocol.h new file mode 100644 index 00000000..a57cdc5b --- /dev/null +++ b/main/protocols/mqtt_protocol.h @@ -0,0 +1,68 @@ +#ifndef MQTT_PROTOCOL_H +#define MQTT_PROTOCOL_H + + +#include "protocol.h" +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#define MQTT_PING_INTERVAL_SECONDS 90 +#define MQTT_RECONNECT_INTERVAL_MS 10000 + +#define MQTT_PROTOCOL_SERVER_HELLO_EVENT (1 << 0) + +class MqttProtocol : public Protocol { +public: + MqttProtocol(std::map& config); + ~MqttProtocol(); + + void OnIncomingAudio(std::function callback); + void OnIncomingJson(std::function callback); + void SendAudio(const std::string& data); + void SendText(const std::string& text); + bool OpenAudioChannel(); + void CloseAudioChannel(); + void OnAudioChannelOpened(std::function callback); + void OnAudioChannelClosed(std::function callback); + +private: + EventGroupHandle_t event_group_handle_; + + std::function on_incoming_json_; + std::function on_incoming_audio_; + std::function on_audio_channel_opened_; + std::function on_audio_channel_closed_; + + std::string endpoint_; + std::string client_id_; + std::string username_; + std::string password_; + std::string subscribe_topic_; + std::string publish_topic_; + + std::mutex channel_mutex_; + Mqtt* mqtt_ = nullptr; + Udp* udp_ = nullptr; + mbedtls_aes_context aes_ctx_; + std::string aes_nonce_; + std::string udp_server_; + int udp_port_; + uint32_t local_sequence_; + uint32_t remote_sequence_; + std::string session_id_; + + bool StartMqttClient(); + void ParseServerHello(const cJSON* root); + std::string DecodeHexString(const std::string& hex_string); +}; + + +#endif // MQTT_PROTOCOL_H diff --git a/main/wake_word_detect.cc b/main/wake_word_detect.cc index 2d73636a..261777d8 100644 --- a/main/wake_word_detect.cc +++ b/main/wake_word_detect.cc @@ -170,6 +170,7 @@ void WakeWordDetect::StoreWakeWordData(uint16_t* data, size_t samples) { } void WakeWordDetect::EncodeWakeWordData() { + wake_word_opus_.clear(); if (wake_word_encode_task_stack_ == nullptr) { wake_word_encode_task_stack_ = (StackType_t*)malloc(4096 * 8); } @@ -180,34 +181,34 @@ void WakeWordDetect::EncodeWakeWordData() { OpusEncoder* encoder = new OpusEncoder(); encoder->Configure(16000, 1, 60); encoder->SetComplexity(0); - this_->wake_word_opus_.resize(4096 * 4); - size_t offset = 0; for (auto& pcm: this_->wake_word_pcm_) { - encoder->Encode(pcm, [this_, &offset](const uint8_t* opus, size_t opus_size) { - size_t protocol_size = sizeof(BinaryProtocol3) + opus_size; - if (offset + protocol_size < this_->wake_word_opus_.size()) { - auto protocol = (BinaryProtocol3*)(&this_->wake_word_opus_[offset]); - protocol->type = 0; - protocol->reserved = 0; - protocol->payload_size = htons(opus_size); - memcpy(protocol->payload, opus, opus_size); - offset += protocol_size; - } + encoder->Encode(pcm, [this_](const uint8_t* opus, size_t opus_size) { + std::lock_guard lock(this_->wake_word_mutex_); + this_->wake_word_opus_.emplace_back(std::string(reinterpret_cast(opus), opus_size)); + this_->wake_word_cv_.notify_one(); }); } this_->wake_word_pcm_.clear(); - this_->wake_word_opus_.resize(offset); auto end_time = esp_timer_get_time(); ESP_LOGI(TAG, "Encode wake word opus: %zu bytes in %lld ms", this_->wake_word_opus_.size(), (end_time - start_time) / 1000); xEventGroupSetBits(this_->event_group_, WAKE_WORD_ENCODED_EVENT); + this_->wake_word_cv_.notify_one(); delete encoder; vTaskDelete(NULL); }, "encode_detect_packets", 4096 * 8, this, 1, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_); } -const std::string&& WakeWordDetect::GetWakeWordStream() { - xEventGroupWaitBits(event_group_, WAKE_WORD_ENCODED_EVENT, pdTRUE, pdTRUE, portMAX_DELAY); - return std::move(wake_word_opus_); +bool WakeWordDetect::GetWakeWordOpus(std::string& opus) { + std::unique_lock lock(wake_word_mutex_); + wake_word_cv_.wait(lock, [this]() { + return !wake_word_opus_.empty() || (xEventGroupGetBits(event_group_) & WAKE_WORD_ENCODED_EVENT); + }); + if (wake_word_opus_.empty()) { + return false; + } + opus.swap(wake_word_opus_.front()); + wake_word_opus_.pop_front(); + return true; } diff --git a/main/wake_word_detect.h b/main/wake_word_detect.h index 53cbb481..f24ba18a 100644 --- a/main/wake_word_detect.h +++ b/main/wake_word_detect.h @@ -12,6 +12,8 @@ #include #include #include +#include +#include class WakeWordDetect { @@ -27,7 +29,7 @@ public: void StopDetection(); bool IsDetectionRunning(); void EncodeWakeWordData(); - const std::string&& GetWakeWordStream(); + bool GetWakeWordOpus(std::string& opus); private: esp_afe_sr_data_t* afe_detection_data_ = nullptr; @@ -48,7 +50,9 @@ private: StaticTask_t wake_word_encode_task_buffer_; StackType_t* wake_word_encode_task_stack_ = nullptr; std::list> wake_word_pcm_; - std::string wake_word_opus_; + std::list wake_word_opus_; + std::mutex wake_word_mutex_; + std::condition_variable wake_word_cv_; void StoreWakeWordData(uint16_t* data, size_t size); void AudioDetectionTask(); diff --git a/sdkconfig.defaults b/sdkconfig.defaults index d4889182..23fe8055 100644 --- a/sdkconfig.defaults +++ b/sdkconfig.defaults @@ -5,6 +5,7 @@ CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE=y CONFIG_HTTPD_MAX_REQ_HDR_LEN=2048 CONFIG_HTTPD_MAX_URI_LEN=2048 +CONFIG_ESP_MAIN_TASK_STACK_SIZE=8192 CONFIG_PARTITION_TABLE_CUSTOM=y CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"