#include "websocket_protocol.h" #include "board.h" #include "system_info.h" #include "application.h" #include #include #include #include #include "assets/lang_config.h" #define TAG "WS" WebsocketProtocol::WebsocketProtocol() { event_group_handle_ = xEventGroupCreate(); } WebsocketProtocol::~WebsocketProtocol() { if (websocket_ != nullptr) { delete websocket_; } vEventGroupDelete(event_group_handle_); } void WebsocketProtocol::Start() { } void WebsocketProtocol::SendAudio(const std::vector& data) { if (websocket_ == nullptr) { return; } busy_sending_audio_ = true; websocket_->Send(data.data(), data.size(), true); busy_sending_audio_ = false; } bool WebsocketProtocol::SendText(const std::string& text) { if (websocket_ == nullptr) { return false; } if (!websocket_->Send(text)) { ESP_LOGE(TAG, "Failed to send text: %s", text.c_str()); SetError(Lang::Strings::SERVER_ERROR); return false; } return true; } bool WebsocketProtocol::IsAudioChannelOpened() const { return websocket_ != nullptr && websocket_->IsConnected() && !error_occurred_ && !IsTimeout(); } void WebsocketProtocol::CloseAudioChannel() { if (websocket_ != nullptr) { delete websocket_; websocket_ = nullptr; } } bool WebsocketProtocol::OpenAudioChannel() { if (websocket_ != nullptr) { delete websocket_; } busy_sending_audio_ = false; error_occurred_ = false; std::string url = CONFIG_WEBSOCKET_URL; std::string token = "Bearer " + std::string(CONFIG_WEBSOCKET_ACCESS_TOKEN); websocket_ = Board::GetInstance().CreateWebSocket(); websocket_->SetHeader("Authorization", token.c_str()); websocket_->SetHeader("Protocol-Version", "1"); websocket_->SetHeader("Device-Id", SystemInfo::GetMacAddress().c_str()); websocket_->SetHeader("Client-Id", Board::GetInstance().GetUuid().c_str()); websocket_->OnData([this](const char* data, size_t len, bool binary) { if (binary) { if (on_incoming_audio_ != nullptr) { on_incoming_audio_(std::vector((uint8_t*)data, (uint8_t*)data + len)); } } else { // Parse JSON data auto root = cJSON_Parse(data); auto type = cJSON_GetObjectItem(root, "type"); if (type != NULL) { if (strcmp(type->valuestring, "hello") == 0) { ParseServerHello(root); } else { if (on_incoming_json_ != nullptr) { on_incoming_json_(root); } } } else { ESP_LOGE(TAG, "Missing message type, data: %s", data); } cJSON_Delete(root); } last_incoming_time_ = std::chrono::steady_clock::now(); }); websocket_->OnDisconnected([this]() { ESP_LOGI(TAG, "Websocket disconnected"); if (on_audio_channel_closed_ != nullptr) { on_audio_channel_closed_(); } }); if (!websocket_->Connect(url.c_str())) { ESP_LOGE(TAG, "Failed to connect to websocket server"); SetError(Lang::Strings::SERVER_NOT_FOUND); return false; } // Send hello message to describe the client // keys: message type, version, audio_params (format, sample_rate, channels) std::string message = "{"; message += "\"type\":\"hello\","; message += "\"version\": 1,"; message += "\"transport\":\"websocket\","; message += "\"audio_params\":{"; message += "\"format\":\"opus\", \"sample_rate\":16000, \"channels\":1, \"frame_duration\":" + std::to_string(OPUS_FRAME_DURATION_MS); message += "}}"; if (!SendText(message)) { return false; } // Wait for server hello EventBits_t bits = xEventGroupWaitBits(event_group_handle_, WEBSOCKET_PROTOCOL_SERVER_HELLO_EVENT, pdTRUE, pdFALSE, pdMS_TO_TICKS(10000)); if (!(bits & WEBSOCKET_PROTOCOL_SERVER_HELLO_EVENT)) { ESP_LOGE(TAG, "Failed to receive server hello"); SetError(Lang::Strings::SERVER_TIMEOUT); return false; } if (on_audio_channel_opened_ != nullptr) { on_audio_channel_opened_(); } return true; } void WebsocketProtocol::ParseServerHello(const cJSON* root) { auto transport = cJSON_GetObjectItem(root, "transport"); if (transport == nullptr || strcmp(transport->valuestring, "websocket") != 0) { ESP_LOGE(TAG, "Unsupported transport: %s", transport->valuestring); return; } auto audio_params = cJSON_GetObjectItem(root, "audio_params"); if (audio_params != NULL) { auto sample_rate = cJSON_GetObjectItem(audio_params, "sample_rate"); if (sample_rate != NULL) { server_sample_rate_ = sample_rate->valueint; } auto frame_duration = cJSON_GetObjectItem(audio_params, "frame_duration"); if (frame_duration != NULL) { server_frame_duration_ = frame_duration->valueint; } } xEventGroupSetBits(event_group_handle_, WEBSOCKET_PROTOCOL_SERVER_HELLO_EVENT); }