forked from xiaozhi/xiaozhi-esp32
Add binary protocol v2 & v3 to websocket
This commit is contained in:
@@ -154,6 +154,8 @@ bool Ota::CheckVersion() {
|
|||||||
cJSON_ArrayForEach(item, websocket) {
|
cJSON_ArrayForEach(item, websocket) {
|
||||||
if (item->type == cJSON_String) {
|
if (item->type == cJSON_String) {
|
||||||
settings.SetString(item->string, item->valuestring);
|
settings.SetString(item->string, item->valuestring);
|
||||||
|
} else if (item->type == cJSON_Number) {
|
||||||
|
settings.SetInt(item->string, item->valueint);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
has_websocket_config_ = true;
|
has_websocket_config_ = true;
|
||||||
|
|||||||
@@ -207,6 +207,11 @@ bool MqttProtocol::OpenAudioChannel() {
|
|||||||
}
|
}
|
||||||
udp_ = Board::GetInstance().CreateUdp();
|
udp_ = Board::GetInstance().CreateUdp();
|
||||||
udp_->OnMessage([this](const std::string& data) {
|
udp_->OnMessage([this](const std::string& data) {
|
||||||
|
/*
|
||||||
|
* UDP Encrypted OPUS Packet Format:
|
||||||
|
* |type 1u|flags 1u|payload_len 2u|ssrc 4u|timestamp 4u|sequence 4u|
|
||||||
|
* |payload payload_len|
|
||||||
|
*/
|
||||||
if (data.size() < sizeof(aes_nonce_)) {
|
if (data.size() < sizeof(aes_nonce_)) {
|
||||||
ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size());
|
ESP_LOGE(TAG, "Invalid audio packet size: %zu", data.size());
|
||||||
return;
|
return;
|
||||||
|
|||||||
@@ -5,6 +5,21 @@
|
|||||||
#include <string>
|
#include <string>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
struct AudioStreamPacket {
|
||||||
|
uint32_t timestamp;
|
||||||
|
std::vector<uint8_t> payload;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BinaryProtocol2 {
|
||||||
|
uint16_t version;
|
||||||
|
uint16_t type; // Message type (0: OPUS, 1: JSON)
|
||||||
|
uint32_t reserved; // Reserved for future use
|
||||||
|
uint32_t timestamp; // Timestamp in milliseconds (used for server-side AEC)
|
||||||
|
uint32_t payload_size; // Payload size in bytes
|
||||||
|
uint8_t payload[]; // Payload data
|
||||||
|
} __attribute__((packed));
|
||||||
|
|
||||||
struct BinaryProtocol3 {
|
struct BinaryProtocol3 {
|
||||||
uint8_t type;
|
uint8_t type;
|
||||||
|
|||||||
@@ -33,9 +33,37 @@ void WebsocketProtocol::SendAudio(const std::vector<uint8_t>& data) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
busy_sending_audio_ = true;
|
if (version_ == 2) {
|
||||||
websocket_->Send(data.data(), data.size(), true);
|
std::string packet;
|
||||||
busy_sending_audio_ = false;
|
packet.resize(sizeof(BinaryProtocol2) + data.size());
|
||||||
|
auto bp2 = (BinaryProtocol2*)packet.data();
|
||||||
|
bp2->version = htons(version_);
|
||||||
|
bp2->type = 0;
|
||||||
|
bp2->reserved = 0;
|
||||||
|
bp2->timestamp = htonl(0);
|
||||||
|
bp2->payload_size = htonl(data.size());
|
||||||
|
memcpy(bp2->payload, data.data(), data.size());
|
||||||
|
|
||||||
|
busy_sending_audio_ = true;
|
||||||
|
websocket_->Send(packet.data(), packet.size(), true);
|
||||||
|
busy_sending_audio_ = false;
|
||||||
|
} else if (version_ == 3) {
|
||||||
|
std::string packet;
|
||||||
|
packet.resize(sizeof(BinaryProtocol3) + data.size());
|
||||||
|
auto bp3 = (BinaryProtocol3*)packet.data();
|
||||||
|
bp3->type = 0;
|
||||||
|
bp3->reserved = 0;
|
||||||
|
bp3->payload_size = htons(data.size());
|
||||||
|
memcpy(bp3->payload, data.data(), data.size());
|
||||||
|
|
||||||
|
busy_sending_audio_ = true;
|
||||||
|
websocket_->Send(packet.data(), packet.size(), true);
|
||||||
|
busy_sending_audio_ = false;
|
||||||
|
} else {
|
||||||
|
busy_sending_audio_ = true;
|
||||||
|
websocket_->Send(data.data(), data.size(), true);
|
||||||
|
busy_sending_audio_ = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WebsocketProtocol::SendText(const std::string& text) {
|
bool WebsocketProtocol::SendText(const std::string& text) {
|
||||||
@@ -71,25 +99,47 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
Settings settings("websocket", false);
|
Settings settings("websocket", false);
|
||||||
std::string url = settings.GetString("url");
|
std::string url = settings.GetString("url");
|
||||||
std::string token = settings.GetString("token");
|
std::string token = settings.GetString("token");
|
||||||
|
int version = settings.GetInt("version");
|
||||||
|
if (version != 0) {
|
||||||
|
version_ = version;
|
||||||
|
}
|
||||||
|
|
||||||
busy_sending_audio_ = false;
|
busy_sending_audio_ = false;
|
||||||
error_occurred_ = false;
|
error_occurred_ = false;
|
||||||
|
|
||||||
// If token not starts with "Bearer " or "bearer ", add it
|
|
||||||
if (token.empty() || (token.find("Bearer ") != 0 && token.find("bearer ") != 0)) {
|
|
||||||
token = "Bearer " + token;
|
|
||||||
}
|
|
||||||
|
|
||||||
websocket_ = Board::GetInstance().CreateWebSocket();
|
websocket_ = Board::GetInstance().CreateWebSocket();
|
||||||
websocket_->SetHeader("Authorization", token.c_str());
|
|
||||||
websocket_->SetHeader("Protocol-Version", "1");
|
if (!token.empty()) {
|
||||||
|
// If token not has a space, add "Bearer " prefix
|
||||||
|
if (token.find(" ") == std::string::npos) {
|
||||||
|
token = "Bearer " + token;
|
||||||
|
}
|
||||||
|
websocket_->SetHeader("Authorization", token.c_str());
|
||||||
|
}
|
||||||
|
websocket_->SetHeader("Protocol-Version", std::to_string(version_).c_str());
|
||||||
websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
|
websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
|
||||||
websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
|
websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
|
||||||
|
|
||||||
websocket_->OnData([this](const char* data, size_t len, bool binary) {
|
websocket_->OnData([this](const char* data, size_t len, bool binary) {
|
||||||
if (binary) {
|
if (binary) {
|
||||||
if (on_incoming_audio_ != nullptr) {
|
if (on_incoming_audio_ != nullptr) {
|
||||||
on_incoming_audio_(std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len));
|
if (version_ == 2) {
|
||||||
|
BinaryProtocol2* bp2 = (BinaryProtocol2*)data;
|
||||||
|
bp2->version = ntohs(bp2->version);
|
||||||
|
bp2->type = ntohs(bp2->type);
|
||||||
|
bp2->timestamp = ntohl(bp2->timestamp);
|
||||||
|
bp2->payload_size = ntohl(bp2->payload_size);
|
||||||
|
auto payload = (uint8_t*)bp2->payload;
|
||||||
|
on_incoming_audio_(std::vector<uint8_t>(payload, payload + bp2->payload_size));
|
||||||
|
} else if (version_ == 3) {
|
||||||
|
BinaryProtocol3* bp3 = (BinaryProtocol3*)data;
|
||||||
|
bp3->type = bp3->type;
|
||||||
|
bp3->payload_size = ntohs(bp3->payload_size);
|
||||||
|
auto payload = (uint8_t*)bp3->payload;
|
||||||
|
on_incoming_audio_(std::vector<uint8_t>(payload, payload + bp3->payload_size));
|
||||||
|
} else {
|
||||||
|
on_incoming_audio_(std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Parse JSON data
|
// Parse JSON data
|
||||||
@@ -118,7 +168,7 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
ESP_LOGI(TAG, "Connecting to websocket server: %s with token: %s", url.c_str(), token.c_str());
|
ESP_LOGI(TAG, "Connecting to websocket server: %s with version: %d", url.c_str(), version_);
|
||||||
if (!websocket_->Connect(url.c_str())) {
|
if (!websocket_->Connect(url.c_str())) {
|
||||||
ESP_LOGE(TAG, "Failed to connect to websocket server");
|
ESP_LOGE(TAG, "Failed to connect to websocket server");
|
||||||
SetError(Lang::Strings::SERVER_NOT_FOUND);
|
SetError(Lang::Strings::SERVER_NOT_FOUND);
|
||||||
@@ -129,7 +179,7 @@ bool WebsocketProtocol::OpenAudioChannel() {
|
|||||||
// keys: message type, version, audio_params (format, sample_rate, channels)
|
// keys: message type, version, audio_params (format, sample_rate, channels)
|
||||||
std::string message = "{";
|
std::string message = "{";
|
||||||
message += "\"type\":\"hello\",";
|
message += "\"type\":\"hello\",";
|
||||||
message += "\"version\": 1,";
|
message += "\"version\": " + std::to_string(version_) + ",";
|
||||||
message += "\"transport\":\"websocket\",";
|
message += "\"transport\":\"websocket\",";
|
||||||
message += "\"audio_params\":{";
|
message += "\"audio_params\":{";
|
||||||
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ public:
|
|||||||
private:
|
private:
|
||||||
EventGroupHandle_t event_group_handle_;
|
EventGroupHandle_t event_group_handle_;
|
||||||
WebSocket* websocket_ = nullptr;
|
WebSocket* websocket_ = nullptr;
|
||||||
|
int version_ = 1;
|
||||||
|
|
||||||
void ParseServerHello(const cJSON* root);
|
void ParseServerHello(const cJSON* root);
|
||||||
bool SendText(const std::string& text) override;
|
bool SendText(const std::string& text) override;
|
||||||
|
|||||||
Reference in New Issue
Block a user