2024-11-16 05:49:35 +08:00
|
|
|
#include "websocket_protocol.h"
|
|
|
|
|
#include "board.h"
|
|
|
|
|
#include "system_info.h"
|
|
|
|
|
#include "application.h"
|
2025-04-21 15:12:52 +08:00
|
|
|
#include "settings.h"
|
2024-11-16 05:49:35 +08:00
|
|
|
|
|
|
|
|
#include <cstring>
|
|
|
|
|
#include <cJSON.h>
|
|
|
|
|
#include <esp_log.h>
|
|
|
|
|
#include <arpa/inet.h>
|
2025-02-18 19:33:07 +08:00
|
|
|
#include "assets/lang_config.h"
|
2024-11-16 05:49:35 +08:00
|
|
|
|
|
|
|
|
#define TAG "WS"
|
|
|
|
|
|
|
|
|
|
WebsocketProtocol::WebsocketProtocol() {
|
|
|
|
|
event_group_handle_ = xEventGroupCreate();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
WebsocketProtocol::~WebsocketProtocol() {
|
|
|
|
|
if (websocket_ != nullptr) {
|
|
|
|
|
delete websocket_;
|
|
|
|
|
}
|
|
|
|
|
vEventGroupDelete(event_group_handle_);
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-21 06:54:50 +08:00
|
|
|
bool WebsocketProtocol::Start() {
|
|
|
|
|
// Only connect to server when audio channel is needed
|
|
|
|
|
return true;
|
2025-02-18 05:14:53 +08:00
|
|
|
}
|
|
|
|
|
|
2025-04-28 23:10:24 +08:00
|
|
|
void WebsocketProtocol::SendAudio(const AudioStreamPacket& packet) {
|
2024-11-16 05:49:35 +08:00
|
|
|
if (websocket_ == nullptr) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-28 16:29:33 +08:00
|
|
|
if (version_ == 2) {
|
2025-04-28 23:10:24 +08:00
|
|
|
std::string serialized;
|
|
|
|
|
serialized.resize(sizeof(BinaryProtocol2) + packet.payload.size());
|
|
|
|
|
auto bp2 = (BinaryProtocol2*)serialized.data();
|
2025-04-28 16:29:33 +08:00
|
|
|
bp2->version = htons(version_);
|
|
|
|
|
bp2->type = 0;
|
|
|
|
|
bp2->reserved = 0;
|
2025-04-28 23:10:24 +08:00
|
|
|
bp2->timestamp = htonl(packet.timestamp);
|
|
|
|
|
bp2->payload_size = htonl(packet.payload.size());
|
|
|
|
|
memcpy(bp2->payload, packet.payload.data(), packet.payload.size());
|
2025-04-28 16:29:33 +08:00
|
|
|
|
|
|
|
|
busy_sending_audio_ = true;
|
2025-04-28 23:10:24 +08:00
|
|
|
websocket_->Send(serialized.data(), serialized.size(), true);
|
2025-04-28 16:29:33 +08:00
|
|
|
busy_sending_audio_ = false;
|
|
|
|
|
} else if (version_ == 3) {
|
2025-04-28 23:10:24 +08:00
|
|
|
std::string serialized;
|
|
|
|
|
serialized.resize(sizeof(BinaryProtocol3) + packet.payload.size());
|
|
|
|
|
auto bp3 = (BinaryProtocol3*)serialized.data();
|
2025-04-28 16:29:33 +08:00
|
|
|
bp3->type = 0;
|
|
|
|
|
bp3->reserved = 0;
|
2025-04-28 23:10:24 +08:00
|
|
|
bp3->payload_size = htons(packet.payload.size());
|
|
|
|
|
memcpy(bp3->payload, packet.payload.data(), packet.payload.size());
|
2025-04-28 16:29:33 +08:00
|
|
|
|
|
|
|
|
busy_sending_audio_ = true;
|
2025-04-28 23:10:24 +08:00
|
|
|
websocket_->Send(serialized.data(), serialized.size(), true);
|
2025-04-28 16:29:33 +08:00
|
|
|
busy_sending_audio_ = false;
|
|
|
|
|
} else {
|
|
|
|
|
busy_sending_audio_ = true;
|
2025-04-28 23:10:24 +08:00
|
|
|
websocket_->Send(packet.payload.data(), packet.payload.size(), true);
|
2025-04-28 16:29:33 +08:00
|
|
|
busy_sending_audio_ = false;
|
|
|
|
|
}
|
2024-11-16 05:49:35 +08:00
|
|
|
}
|
|
|
|
|
|
2025-04-09 09:13:18 +08:00
|
|
|
bool WebsocketProtocol::SendText(const std::string& text) {
|
2024-11-16 05:49:35 +08:00
|
|
|
if (websocket_ == nullptr) {
|
2025-04-09 09:13:18 +08:00
|
|
|
return false;
|
2024-11-16 05:49:35 +08:00
|
|
|
}
|
|
|
|
|
|
2025-03-04 05:32:11 +08:00
|
|
|
if (!websocket_->Send(text)) {
|
|
|
|
|
ESP_LOGE(TAG, "Failed to send text: %s", text.c_str());
|
|
|
|
|
SetError(Lang::Strings::SERVER_ERROR);
|
2025-04-09 09:13:18 +08:00
|
|
|
return false;
|
2025-03-04 05:32:11 +08:00
|
|
|
}
|
2025-04-09 09:13:18 +08:00
|
|
|
|
|
|
|
|
return true;
|
2024-11-16 05:49:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WebsocketProtocol::IsAudioChannelOpened() const {
|
2025-03-04 05:32:11 +08:00
|
|
|
return websocket_ != nullptr && websocket_->IsConnected() && !error_occurred_ && !IsTimeout();
|
2024-11-16 05:49:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WebsocketProtocol::CloseAudioChannel() {
|
|
|
|
|
if (websocket_ != nullptr) {
|
|
|
|
|
delete websocket_;
|
|
|
|
|
websocket_ = nullptr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool WebsocketProtocol::OpenAudioChannel() {
|
|
|
|
|
if (websocket_ != nullptr) {
|
|
|
|
|
delete websocket_;
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-21 15:12:52 +08:00
|
|
|
Settings settings("websocket", false);
|
|
|
|
|
std::string url = settings.GetString("url");
|
|
|
|
|
std::string token = settings.GetString("token");
|
2025-04-28 16:29:33 +08:00
|
|
|
int version = settings.GetInt("version");
|
|
|
|
|
if (version != 0) {
|
|
|
|
|
version_ = version;
|
|
|
|
|
}
|
2025-04-21 15:12:52 +08:00
|
|
|
|
2025-04-13 23:12:44 +08:00
|
|
|
busy_sending_audio_ = false;
|
2025-03-04 05:32:11 +08:00
|
|
|
error_occurred_ = false;
|
2025-04-21 15:12:52 +08:00
|
|
|
|
2024-11-16 05:49:35 +08:00
|
|
|
websocket_ = Board::GetInstance().CreateWebSocket();
|
2025-04-28 16:29:33 +08:00
|
|
|
|
|
|
|
|
if (!token.empty()) {
|
|
|
|
|
// If token not has a space, add "Bearer " prefix
|
|
|
|
|
if (token.find(" ") == std::string::npos) {
|
|
|
|
|
token = "Bearer " + token;
|
|
|
|
|
}
|
|
|
|
|
websocket_->SetHeader("Authorization", token.c_str());
|
|
|
|
|
}
|
|
|
|
|
websocket_->SetHeader("Protocol-Version", std::to_string(version_).c_str());
|
2024-11-16 05:49:35 +08:00
|
|
|
websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str());
|
2025-02-16 06:59:19 +08:00
|
|
|
websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str());
|
2024-11-16 05:49:35 +08:00
|
|
|
|
|
|
|
|
websocket_->OnData([this](const char* data, size_t len, bool binary) {
|
|
|
|
|
if (binary) {
|
|
|
|
|
if (on_incoming_audio_ != nullptr) {
|
2025-04-28 16:29:33 +08:00
|
|
|
if (version_ == 2) {
|
|
|
|
|
BinaryProtocol2* bp2 = (BinaryProtocol2*)data;
|
|
|
|
|
bp2->version = ntohs(bp2->version);
|
|
|
|
|
bp2->type = ntohs(bp2->type);
|
|
|
|
|
bp2->timestamp = ntohl(bp2->timestamp);
|
|
|
|
|
bp2->payload_size = ntohl(bp2->payload_size);
|
|
|
|
|
auto payload = (uint8_t*)bp2->payload;
|
2025-04-28 23:10:24 +08:00
|
|
|
on_incoming_audio_(AudioStreamPacket{
|
|
|
|
|
.timestamp = bp2->timestamp,
|
|
|
|
|
.payload = std::vector<uint8_t>(payload, payload + bp2->payload_size)
|
|
|
|
|
});
|
2025-04-28 16:29:33 +08:00
|
|
|
} else if (version_ == 3) {
|
|
|
|
|
BinaryProtocol3* bp3 = (BinaryProtocol3*)data;
|
|
|
|
|
bp3->type = bp3->type;
|
|
|
|
|
bp3->payload_size = ntohs(bp3->payload_size);
|
|
|
|
|
auto payload = (uint8_t*)bp3->payload;
|
2025-04-28 23:10:24 +08:00
|
|
|
on_incoming_audio_(AudioStreamPacket{
|
|
|
|
|
.timestamp = 0,
|
|
|
|
|
.payload = std::vector<uint8_t>(payload, payload + bp3->payload_size)
|
|
|
|
|
});
|
2025-04-28 16:29:33 +08:00
|
|
|
} else {
|
2025-04-28 23:10:24 +08:00
|
|
|
on_incoming_audio_(AudioStreamPacket{
|
|
|
|
|
.timestamp = 0,
|
|
|
|
|
.payload = std::vector<uint8_t>((uint8_t*)data, (uint8_t*)data + len)
|
|
|
|
|
});
|
2025-04-28 16:29:33 +08:00
|
|
|
}
|
2024-11-16 05:49:35 +08:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// Parse JSON data
|
|
|
|
|
auto root = cJSON_Parse(data);
|
|
|
|
|
auto type = cJSON_GetObjectItem(root, "type");
|
2025-05-21 15:59:27 +08:00
|
|
|
if (cJSON_IsString(type)) {
|
2024-11-16 05:49:35 +08:00
|
|
|
if (strcmp(type->valuestring, "hello") == 0) {
|
|
|
|
|
ParseServerHello(root);
|
|
|
|
|
} else {
|
|
|
|
|
if (on_incoming_json_ != nullptr) {
|
|
|
|
|
on_incoming_json_(root);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
ESP_LOGE(TAG, "Missing message type, data: %s", data);
|
|
|
|
|
}
|
|
|
|
|
cJSON_Delete(root);
|
|
|
|
|
}
|
2025-03-04 05:32:11 +08:00
|
|
|
last_incoming_time_ = std::chrono::steady_clock::now();
|
2024-11-16 05:49:35 +08:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
websocket_->OnDisconnected([this]() {
|
|
|
|
|
ESP_LOGI(TAG, "Websocket disconnected");
|
|
|
|
|
if (on_audio_channel_closed_ != nullptr) {
|
|
|
|
|
on_audio_channel_closed_();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2025-04-28 16:29:33 +08:00
|
|
|
ESP_LOGI(TAG, "Connecting to websocket server: %s with version: %d", url.c_str(), version_);
|
2024-11-16 05:49:35 +08:00
|
|
|
if (!websocket_->Connect(url.c_str())) {
|
|
|
|
|
ESP_LOGE(TAG, "Failed to connect to websocket server");
|
2025-03-04 05:32:11 +08:00
|
|
|
SetError(Lang::Strings::SERVER_NOT_FOUND);
|
2024-11-16 05:49:35 +08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Send hello message to describe the client
|
|
|
|
|
// keys: message type, version, audio_params (format, sample_rate, channels)
|
|
|
|
|
std::string message = "{";
|
|
|
|
|
message += "\"type\":\"hello\",";
|
2025-04-28 16:29:33 +08:00
|
|
|
message += "\"version\": " + std::to_string(version_) + ",";
|
2025-05-09 14:00:26 +08:00
|
|
|
#if CONFIG_USE_SERVER_AEC
|
|
|
|
|
message += "\"features\":{\"aec\":true},";
|
|
|
|
|
#endif
|
2024-11-16 05:49:35 +08:00
|
|
|
message += "\"transport\":\"websocket\",";
|
|
|
|
|
message += "\"audio_params\":{";
|
|
|
|
|
message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS);
|
|
|
|
|
message += "}}";
|
2025-04-09 09:13:18 +08:00
|
|
|
if (!SendText(message)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2024-11-16 05:49:35 +08:00
|
|
|
|
|
|
|
|
// Wait for server hello
|
|
|
|
|
EventBits_t bits = xEventGroupWaitBits(event_group_handle_, WEBSOCKET_PROTOCOL_SERVER_HELLO_EVENT, pdTRUE, pdFALSE, pdMS_TO_TICKS(10000));
|
|
|
|
|
if (!(bits & WEBSOCKET_PROTOCOL_SERVER_HELLO_EVENT)) {
|
|
|
|
|
ESP_LOGE(TAG, "Failed to receive server hello");
|
2025-03-04 05:32:11 +08:00
|
|
|
SetError(Lang::Strings::SERVER_TIMEOUT);
|
2024-11-16 05:49:35 +08:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (on_audio_channel_opened_ != nullptr) {
|
|
|
|
|
on_audio_channel_opened_();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void WebsocketProtocol::ParseServerHello(const cJSON* root) {
|
|
|
|
|
auto transport = cJSON_GetObjectItem(root, "transport");
|
|
|
|
|
if (transport == nullptr || strcmp(transport->valuestring, "websocket") != 0) {
|
|
|
|
|
ESP_LOGE(TAG, "Unsupported transport: %s", transport->valuestring);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-13 17:16:18 +08:00
|
|
|
auto session_id = cJSON_GetObjectItem(root, "session_id");
|
2025-05-21 15:59:27 +08:00
|
|
|
if (cJSON_IsString(session_id)) {
|
2025-05-13 17:16:18 +08:00
|
|
|
session_id_ = session_id->valuestring;
|
|
|
|
|
ESP_LOGI(TAG, "Session ID: %s", session_id_.c_str());
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-16 05:49:35 +08:00
|
|
|
auto audio_params = cJSON_GetObjectItem(root, "audio_params");
|
2025-05-21 15:59:27 +08:00
|
|
|
if (cJSON_IsObject(audio_params)) {
|
2024-11-16 05:49:35 +08:00
|
|
|
auto sample_rate = cJSON_GetObjectItem(audio_params, "sample_rate");
|
2025-05-21 15:59:27 +08:00
|
|
|
if (cJSON_IsNumber(sample_rate)) {
|
2024-11-16 05:49:35 +08:00
|
|
|
server_sample_rate_ = sample_rate->valueint;
|
|
|
|
|
}
|
2025-03-30 09:07:08 +08:00
|
|
|
auto frame_duration = cJSON_GetObjectItem(audio_params, "frame_duration");
|
2025-05-21 15:59:27 +08:00
|
|
|
if (cJSON_IsNumber(frame_duration)) {
|
2025-03-30 09:07:08 +08:00
|
|
|
server_frame_duration_ = frame_duration->valueint;
|
|
|
|
|
}
|
2024-11-16 05:49:35 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xEventGroupSetBits(event_group_handle_, WEBSOCKET_PROTOCOL_SERVER_HELLO_EVENT);
|
|
|
|
|
}
|