forked from xiaozhi/xiaozhi-esp32
v1.8.0: Audio 代码重构与低功耗优化 (#943)
* Reconstruct Audio Code * Remove old IoT implementation * Add MQTT-UDP documentation * OTA升级失败时,可以继续使用
This commit is contained in:
@@ -40,7 +40,7 @@ bool MqttProtocol::StartMqttClient(bool report_error) {
|
||||
auto client_id = settings.GetString("client_id");
|
||||
auto username = settings.GetString("username");
|
||||
auto password = settings.GetString("password");
|
||||
int keepalive_interval = settings.GetInt("keepalive", 120);
|
||||
int keepalive_interval = settings.GetInt("keepalive", 240);
|
||||
publish_topic_ = settings.GetString("publish_topic");
|
||||
|
||||
if (endpoint.empty()) {
|
||||
@@ -121,25 +121,25 @@ bool MqttProtocol::SendText(const std::string& text) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool MqttProtocol::SendAudio(const AudioStreamPacket& packet) {
|
||||
bool MqttProtocol::SendAudio(std::unique_ptr<AudioStreamPacket> packet) {
|
||||
std::lock_guard<std::mutex> lock(channel_mutex_);
|
||||
if (udp_ == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string nonce(aes_nonce_);
|
||||
*(uint16_t*)&nonce[2] = htons(packet.payload.size());
|
||||
*(uint32_t*)&nonce[8] = htonl(packet.timestamp);
|
||||
*(uint16_t*)&nonce[2] = htons(packet->payload.size());
|
||||
*(uint32_t*)&nonce[8] = htonl(packet->timestamp);
|
||||
*(uint32_t*)&nonce[12] = htonl(++local_sequence_);
|
||||
|
||||
std::string encrypted;
|
||||
encrypted.resize(aes_nonce_.size() + packet.payload.size());
|
||||
encrypted.resize(aes_nonce_.size() + packet->payload.size());
|
||||
memcpy(encrypted.data(), nonce.data(), nonce.size());
|
||||
|
||||
size_t nc_off = 0;
|
||||
uint8_t stream_block[16] = {0};
|
||||
if (mbedtls_aes_crypt_ctr(&aes_ctx_, packet.payload.size(), &nc_off, (uint8_t*)nonce.c_str(), stream_block,
|
||||
(uint8_t*)packet.payload.data(), (uint8_t*)&encrypted[nonce.size()]) != 0) {
|
||||
if (mbedtls_aes_crypt_ctr(&aes_ctx_, packet->payload.size(), &nc_off, (uint8_t*)nonce.c_str(), stream_block,
|
||||
(uint8_t*)packet->payload.data(), (uint8_t*)&encrypted[nonce.size()]) != 0) {
|
||||
ESP_LOGE(TAG, "Failed to encrypt audio data");
|
||||
return false;
|
||||
}
|
||||
@@ -228,12 +228,12 @@ bool MqttProtocol::OpenAudioChannel() {
|
||||
uint8_t stream_block[16] = {0};
|
||||
auto nonce = (uint8_t*)data.data();
|
||||
auto encrypted = (uint8_t*)data.data() + aes_nonce_.size();
|
||||
AudioStreamPacket packet;
|
||||
packet.sample_rate = server_sample_rate_;
|
||||
packet.frame_duration = server_frame_duration_;
|
||||
packet.timestamp = timestamp;
|
||||
packet.payload.resize(decrypted_size);
|
||||
int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet.payload.data());
|
||||
auto packet = std::make_unique<AudioStreamPacket>();
|
||||
packet->sample_rate = server_sample_rate_;
|
||||
packet->frame_duration = server_frame_duration_;
|
||||
packet->timestamp = timestamp;
|
||||
packet->payload.resize(decrypted_size);
|
||||
int ret = mbedtls_aes_crypt_ctr(&aes_ctx_, decrypted_size, &nc_off, nonce, stream_block, encrypted, (uint8_t*)packet->payload.data());
|
||||
if (ret != 0) {
|
||||
ESP_LOGE(TAG, "Failed to decrypt audio data, ret: %d", ret);
|
||||
return;
|
||||
@@ -263,9 +263,7 @@ std::string MqttProtocol::GetHelloMessage() {
|
||||
#if CONFIG_USE_SERVER_AEC
|
||||
cJSON_AddBoolToObject(features, "aec", true);
|
||||
#endif
|
||||
#if CONFIG_IOT_PROTOCOL_MCP
|
||||
cJSON_AddBoolToObject(features, "mcp", true);
|
||||
#endif
|
||||
cJSON_AddItemToObject(root, "features", features);
|
||||
cJSON* audio_params = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(audio_params, "format", "opus");
|
||||
|
||||
@@ -26,7 +26,7 @@ public:
|
||||
~MqttProtocol();
|
||||
|
||||
bool Start() override;
|
||||
bool SendAudio(const AudioStreamPacket& packet) override;
|
||||
bool SendAudio(std::unique_ptr<AudioStreamPacket> packet) override;
|
||||
bool OpenAudioChannel() override;
|
||||
void CloseAudioChannel() override;
|
||||
bool IsAudioChannelOpened() const override;
|
||||
|
||||
@@ -8,7 +8,7 @@ void Protocol::OnIncomingJson(std::function<void(const cJSON* root)> callback) {
|
||||
on_incoming_json_ = callback;
|
||||
}
|
||||
|
||||
void Protocol::OnIncomingAudio(std::function<void(AudioStreamPacket&& packet)> callback) {
|
||||
void Protocol::OnIncomingAudio(std::function<void(std::unique_ptr<AudioStreamPacket> packet)> callback) {
|
||||
on_incoming_audio_ = callback;
|
||||
}
|
||||
|
||||
@@ -65,56 +65,6 @@ void Protocol::SendStopListening() {
|
||||
SendText(message);
|
||||
}
|
||||
|
||||
void Protocol::SendIotDescriptors(const std::string& descriptors) {
|
||||
cJSON* root = cJSON_Parse(descriptors.c_str());
|
||||
if (root == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to parse IoT descriptors: %s", descriptors.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
if (!cJSON_IsArray(root)) {
|
||||
ESP_LOGE(TAG, "IoT descriptors should be an array");
|
||||
cJSON_Delete(root);
|
||||
return;
|
||||
}
|
||||
|
||||
int arraySize = cJSON_GetArraySize(root);
|
||||
for (int i = 0; i < arraySize; ++i) {
|
||||
cJSON* descriptor = cJSON_GetArrayItem(root, i);
|
||||
if (descriptor == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to get IoT descriptor at index %d", i);
|
||||
continue;
|
||||
}
|
||||
|
||||
cJSON* messageRoot = cJSON_CreateObject();
|
||||
cJSON_AddStringToObject(messageRoot, "session_id", session_id_.c_str());
|
||||
cJSON_AddStringToObject(messageRoot, "type", "iot");
|
||||
cJSON_AddBoolToObject(messageRoot, "update", true);
|
||||
|
||||
cJSON* descriptorArray = cJSON_CreateArray();
|
||||
cJSON_AddItemToArray(descriptorArray, cJSON_Duplicate(descriptor, 1));
|
||||
cJSON_AddItemToObject(messageRoot, "descriptors", descriptorArray);
|
||||
|
||||
char* message = cJSON_PrintUnformatted(messageRoot);
|
||||
if (message == nullptr) {
|
||||
ESP_LOGE(TAG, "Failed to print JSON message for IoT descriptor at index %d", i);
|
||||
cJSON_Delete(messageRoot);
|
||||
continue;
|
||||
}
|
||||
|
||||
SendText(std::string(message));
|
||||
cJSON_free(message);
|
||||
cJSON_Delete(messageRoot);
|
||||
}
|
||||
|
||||
cJSON_Delete(root);
|
||||
}
|
||||
|
||||
void Protocol::SendIotStates(const std::string& states) {
|
||||
std::string message = "{\"session_id\":\"" + session_id_ + "\",\"type\":\"iot\",\"update\":true,\"states\":" + states + "}";
|
||||
SendText(message);
|
||||
}
|
||||
|
||||
void Protocol::SendMcpMessage(const std::string& payload) {
|
||||
std::string message = "{\"session_id\":\"" + session_id_ + "\",\"type\":\"mcp\",\"payload\":" + payload + "}";
|
||||
SendText(message);
|
||||
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
return session_id_;
|
||||
}
|
||||
|
||||
void OnIncomingAudio(std::function<void(AudioStreamPacket&& packet)> callback);
|
||||
void OnIncomingAudio(std::function<void(std::unique_ptr<AudioStreamPacket> packet)> callback);
|
||||
void OnIncomingJson(std::function<void(const cJSON* root)> callback);
|
||||
void OnAudioChannelOpened(std::function<void()> callback);
|
||||
void OnAudioChannelClosed(std::function<void()> callback);
|
||||
@@ -65,18 +65,16 @@ public:
|
||||
virtual bool OpenAudioChannel() = 0;
|
||||
virtual void CloseAudioChannel() = 0;
|
||||
virtual bool IsAudioChannelOpened() const = 0;
|
||||
virtual bool SendAudio(const AudioStreamPacket& packet) = 0;
|
||||
virtual bool SendAudio(std::unique_ptr<AudioStreamPacket> packet) = 0;
|
||||
virtual void SendWakeWordDetected(const std::string& wake_word);
|
||||
virtual void SendStartListening(ListeningMode mode);
|
||||
virtual void SendStopListening();
|
||||
virtual void SendAbortSpeaking(AbortReason reason);
|
||||
virtual void SendIotDescriptors(const std::string& descriptors);
|
||||
virtual void SendIotStates(const std::string& states);
|
||||
virtual void SendMcpMessage(const std::string& message);
|
||||
|
||||
protected:
|
||||
std::function<void(const cJSON* root)> on_incoming_json_;
|
||||
std::function<void(AudioStreamPacket&& packet)> on_incoming_audio_;
|
||||
std::function<void(std::unique_ptr<AudioStreamPacket> packet)> on_incoming_audio_;
|
||||
std::function<void()> on_audio_channel_opened_;
|
||||
std::function<void()> on_audio_channel_closed_;
|
||||
std::function<void(const std::string& message)> on_network_error_;
|
||||
|
||||
@@ -28,35 +28,35 @@ bool WebsocketProtocol::Start() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) {
|
||||
bool WebsocketProtocol::SendAudio(std::unique_ptr<AudioStreamPacket> packet) {
|
||||
if (websocket_ == nullptr || !websocket_->IsConnected()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (version_ == 2) {
|
||||
std::string serialized;
|
||||
serialized.resize(sizeof(BinaryProtocol2) + packet.payload.size());
|
||||
serialized.resize(sizeof(BinaryProtocol2) + packet->payload.size());
|
||||
auto bp2 = (BinaryProtocol2*)serialized.data();
|
||||
bp2->version = htons(version_);
|
||||
bp2->type = 0;
|
||||
bp2->reserved = 0;
|
||||
bp2->timestamp = htonl(packet.timestamp);
|
||||
bp2->payload_size = htonl(packet.payload.size());
|
||||
memcpy(bp2->payload, packet.payload.data(), packet.payload.size());
|
||||
bp2->timestamp = htonl(packet->timestamp);
|
||||
bp2->payload_size = htonl(packet->payload.size());
|
||||
memcpy(bp2->payload, packet->payload.data(), packet->payload.size());
|
||||
|
||||
return websocket_->Send(serialized.data(), serialized.size(), true);
|
||||
} else if (version_ == 3) {
|
||||
std::string serialized;
|
||||
serialized.resize(sizeof(BinaryProtocol3) + packet.payload.size());
|
||||
serialized.resize(sizeof(BinaryProtocol3) + packet->payload.size());
|
||||
auto bp3 = (BinaryProtocol3*)serialized.data();
|
||||
bp3->type = 0;
|
||||
bp3->reserved = 0;
|
||||
bp3->payload_size = htons(packet.payload.size());
|
||||
memcpy(bp3->payload, packet.payload.data(), packet.payload.size());
|
||||
bp3->payload_size = htons(packet->payload.size());
|
||||
memcpy(bp3->payload, packet->payload.data(), packet->payload.size());
|
||||
|
||||
return websocket_->Send(serialized.data(), serialized.size(), true);
|
||||
} else {
|
||||
return websocket_->Send(packet.payload.data(), packet.payload.size(), true);
|
||||
return websocket_->Send(packet->payload.data(), packet->payload.size(), true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,30 +124,30 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
||||
bp2->timestamp = ntohl(bp2->timestamp);
|
||||
bp2->payload_size = ntohl(bp2->payload_size);
|
||||
auto payload = (uint8_t*)bp2->payload;
|
||||
on_incoming_audio_(AudioStreamPacket{
|
||||
on_incoming_audio_(std::make_unique<AudioStreamPacket>(AudioStreamPacket{
|
||||
.sample_rate = server_sample_rate_,
|
||||
.frame_duration = server_frame_duration_,
|
||||
.timestamp = bp2->timestamp,
|
||||
.payload = std::vector<uint8_t>(payload, payload + bp2->payload_size)
|
||||
});
|
||||
}));
|
||||
} else if (version_ == 3) {
|
||||
BinaryProtocol3* bp3 = (BinaryProtocol3*)data;
|
||||
bp3->type = bp3->type;
|
||||
bp3->payload_size = ntohs(bp3->payload_size);
|
||||
auto payload = (uint8_t*)bp3->payload;
|
||||
on_incoming_audio_(AudioStreamPacket{
|
||||
on_incoming_audio_(std::make_unique<AudioStreamPacket>(AudioStreamPacket{
|
||||
.sample_rate = server_sample_rate_,
|
||||
.frame_duration = server_frame_duration_,
|
||||
.timestamp = 0,
|
||||
.payload = std::vector<uint8_t>(payload, payload + bp3->payload_size)
|
||||
});
|
||||
}));
|
||||
} else {
|
||||
on_incoming_audio_(AudioStreamPacket{
|
||||
on_incoming_audio_(std::make_unique<AudioStreamPacket>(AudioStreamPacket{
|
||||
.sample_rate = server_sample_rate_,
|
||||
.frame_duration = server_frame_duration_,
|
||||
.timestamp = 0,
|
||||
.payload = std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len)
|
||||
});
|
||||
}));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -214,9 +214,7 @@ std::string WebsocketProtocol::GetHelloMessage() {
|
||||
#if CONFIG_USE_SERVER_AEC
|
||||
cJSON_AddBoolToObject(features, "aec", true);
|
||||
#endif
|
||||
#if CONFIG_IOT_PROTOCOL_MCP
|
||||
cJSON_AddBoolToObject(features, "mcp", true);
|
||||
#endif
|
||||
cJSON_AddItemToObject(root, "features", features);
|
||||
cJSON_AddStringToObject(root, "transport", "websocket");
|
||||
cJSON* audio_params = cJSON_CreateObject();
|
||||
|
||||
@@ -16,7 +16,7 @@ public:
|
||||
~WebsocketProtocol();
|
||||
|
||||
bool Start() override;
|
||||
bool SendAudio(const AudioStreamPacket& packet) override;
|
||||
bool SendAudio(std::unique_ptr<AudioStreamPacket> packet) override;
|
||||
bool OpenAudioChannel() override;
|
||||
void CloseAudioChannel() override;
|
||||
bool IsAudioChannelOpened() const override;
|
||||
|
||||
Reference in New Issue
Block a user