From 10016a3ea518ade900693850405fa8d21b3958b0 Mon Sep 17 00:00:00 2001 From: Ky1eYang Date: Sat, 5 Jul 2025 14:45:48 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E5=A3=B0=E6=B3=A2?= =?UTF-8?q?=E9=85=8D=E7=BD=91,=20=E9=9C=80=E8=B0=83=E6=95=B4application?= =?UTF-8?q?=E7=9A=84ReadAudio=E5=85=AC=E6=9C=89,=20=E9=9C=80=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E6=9D=A1=E4=BB=B6=E7=BC=96=E8=AF=91,=20=E4=BD=8D?= =?UTF-8?q?=E4=BA=8E'afsk=5Fdemod.h'=E5=86=85=E5=AE=9A=E4=B9=89=E5=8F=82?= =?UTF-8?q?=E6=95=B0=20(#852)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: 添加声波配网, 需调整application的ReadAudio公有, 需添加条件编译, 位于'afsk_demod.h'内定义参数 * mod: afsk的重构,旨在提高代码可读性并遵循Google C++代码风格指南 * mod: 更新依赖esp-wifi-connect需求版号 * feat: 添加声波配网, 需调整application的ReadAudio公有, 需添加条件编译, 位于'afsk_demod.h'内定义参数 * mod: afsk的重构,旨在提高代码可读性并遵循Google C++代码风格指南 * mod: 更新依赖esp-wifi-connect需求版号 * mod: 添加判断只有在WiFi配置模式下才会调用ReadAudio, 否则delay(联网成功重启后该任务不会被启动) * add: 添加USE_ACOUSTIC_WIFI_PROVISIONING进MENU开关声波配网功能 --------- Co-authored-by: yangkaiyue --- main/Kconfig.projbuild | 6 + main/application.h | 2 +- main/boards/common/afsk_demod.cc | 411 +++++++++++++++++++++++++++++++ main/boards/common/afsk_demod.h | 176 +++++++++++++ main/boards/common/wifi_board.cc | 5 + main/idf_component.yml | 2 +- 6 files changed, 600 insertions(+), 2 deletions(-) create mode 100644 main/boards/common/afsk_demod.cc create mode 100644 main/boards/common/afsk_demod.h diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index fb5acf71..72b894ce 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -409,6 +409,12 @@ config USE_AUDIO_DEBUGGER help 启用音频调试功能,通过UDP发送音频数据 +config USE_ACOUSTIC_WIFI_PROVISIONING + bool "Enable Acoustic WiFi Provisioning" + default n + help + 启用声波配网功能,使用音频信号传输 WiFi 配置数据 + config AUDIO_DEBUG_UDP_SERVER string "Audio Debug UDP Server Address" default "192.168.2.100:8000" diff --git a/main/application.h b/main/application.h index b73075c2..ff23fc61 100644 --- a/main/application.h +++ b/main/application.h @@ -80,6 +80,7 @@ public: bool CanEnterSleepMode(); void SendMcpMessage(const std::string& payload); void SetAecMode(AecMode mode); + bool ReadAudio(std::vector& data, int sample_rate, int samples); AecMode GetAecMode() const { return aec_mode_; } BackgroundTask* GetBackgroundTask() const { return background_task_; } @@ -129,7 +130,6 @@ private: void MainEventLoop(); void OnAudioInput(); void OnAudioOutput(); - bool ReadAudio(std::vector& data, int sample_rate, int samples); void ResetDecoder(); void SetDecodeSampleRate(int sample_rate, int frame_duration); void CheckNewVersion(Ota& ota); diff --git a/main/boards/common/afsk_demod.cc b/main/boards/common/afsk_demod.cc new file mode 100644 index 00000000..ac52919e --- /dev/null +++ b/main/boards/common/afsk_demod.cc @@ -0,0 +1,411 @@ +#include "afsk_demod.h" +#include +#include +#include "esp_log.h" + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +namespace audio_wifi_config +{ + static const char *kLogTag = "AUDIO_WIFI_CONFIG"; + + void ReceiveWifiCredentialsFromAudio(Application *app, + WifiConfigurationAp *wifi_ap) + { + const int kInputSampleRate = 16000; // Input sampling rate + const float kDownsampleStep = static_cast(kInputSampleRate) / static_cast(kAudioSampleRate); // Downsampling step + std::vector audio_data; + AudioSignalProcessor signal_processor(kAudioSampleRate, kMarkFrequency, kSpaceFrequency, kBitRate, kWindowSize); + AudioDataBuffer data_buffer; + + while (true) + { + // 检查Application状态,只有在WiFi配置模式下才处理音频 + if (app->GetDeviceState() != kDeviceStateWifiConfiguring) { + // 不在WiFi配置状态,休眠100ms后再检查 + vTaskDelay(pdMS_TO_TICKS(100)); + continue; + } + + if (!app->ReadAudio(audio_data, 16000, 480)) { // 16kHz, 480 samples corresponds to 30ms data + // 读取音频失败,短暂延迟后重试 + ESP_LOGI(kLogTag, "Failed to read audio data, retrying."); + vTaskDelay(pdMS_TO_TICKS(10)); + continue; + } + + // Downsample the audio data + std::vector downsampled_data; + size_t last_index = 0; + + if (kDownsampleStep > 1.0f) + { + downsampled_data.reserve(audio_data.size() / static_cast(kDownsampleStep)); + for (size_t i = 0; i < audio_data.size(); ++i) + { + size_t sample_index = static_cast(i / kDownsampleStep); + if ((sample_index + 1) > last_index) + { + downsampled_data.push_back(static_cast(audio_data[i])); + last_index = sample_index + 1; + } + } + } + else + { + downsampled_data.reserve(audio_data.size()); + for (int16_t sample : audio_data) + { + downsampled_data.push_back(static_cast(sample)); + } + } + + // Process audio samples to get probability data + auto probabilities = signal_processor.ProcessAudioSamples(downsampled_data); + + // Feed probability data to the data buffer + if (data_buffer.ProcessProbabilityData(probabilities, 0.5f)) + { + // If complete data was received, extract WiFi credentials + if (data_buffer.decoded_text.has_value()) + { + ESP_LOGI(kLogTag, "Received text data: %s", data_buffer.decoded_text->c_str()); + + // Split SSID and password by newline character + std::string wifi_ssid, wifi_password; + size_t newline_position = data_buffer.decoded_text->find('\n'); + if (newline_position != std::string::npos) + { + wifi_ssid = data_buffer.decoded_text->substr(0, newline_position); + wifi_password = data_buffer.decoded_text->substr(newline_position + 1); + ESP_LOGI(kLogTag, "WiFi SSID: %s, Password: %s", wifi_ssid.c_str(), wifi_password.c_str()); + } + else + { + ESP_LOGE(kLogTag, "Invalid data format, no newline character found"); + continue; + } + + if (wifi_ap->ConnectToWifi(wifi_ssid, wifi_password)) + { + wifi_ap->Save(wifi_ssid, wifi_password); // Save WiFi credentials + esp_restart(); // Restart device to apply new WiFi configuration + } + else + { + ESP_LOGE(kLogTag, "Failed to connect to WiFi with received credentials"); + } + data_buffer.decoded_text.reset(); // Clear processed data + } + } + vTaskDelay(pdMS_TO_TICKS(1)); // 1ms delay + } + } + + // Default start and end transmission identifiers + // \x01\x02 = 00000001 00000010 + const std::vector kDefaultStartTransmissionPattern = { + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0}; + + // \x03\x04 = 00000011 00000100 + const std::vector kDefaultEndTransmissionPattern = { + 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0}; + + // FrequencyDetector implementation + FrequencyDetector::FrequencyDetector(float frequency, size_t window_size) + : frequency_(frequency), window_size_(window_size) + { + frequency_bin_ = std::floor(frequency_ * static_cast(window_size_)); + angular_frequency_ = 2.0f * M_PI * frequency_; + cos_coefficient_ = std::cos(angular_frequency_); + sin_coefficient_ = std::sin(angular_frequency_); + filter_coefficient_ = 2.0f * cos_coefficient_; + + // Initialize state buffer + state_buffer_.push_back(0.0f); + state_buffer_.push_back(0.0f); + } + + void FrequencyDetector::Reset() + { + state_buffer_.clear(); + state_buffer_.push_back(0.0f); + state_buffer_.push_back(0.0f); + } + + void FrequencyDetector::ProcessSample(float sample) + { + if (state_buffer_.size() < 2) + { + return; + } + + float s_minus_2 = state_buffer_.front(); // S[-2] + state_buffer_.pop_front(); + float s_minus_1 = state_buffer_.front(); // S[-1] + state_buffer_.pop_front(); + + float s_current = sample + filter_coefficient_ * s_minus_1 - s_minus_2; + + state_buffer_.push_back(s_minus_1); // Put S[-1] back + state_buffer_.push_back(s_current); // Add new S[0] + } + + float FrequencyDetector::GetAmplitude() const + { + if (state_buffer_.size() < 2) + { + return 0.0f; + } + + float s_minus_1 = state_buffer_[1]; // S[-1] + float s_minus_2 = state_buffer_[0]; // S[-2] + float real_part = cos_coefficient_ * s_minus_1 - s_minus_2; // Real part + float imaginary_part = sin_coefficient_ * s_minus_1; // Imaginary part + + return std::sqrt(real_part * real_part + imaginary_part * imaginary_part) / + (static_cast(window_size_) / 2.0f); + } + + // AudioSignalProcessor implementation + AudioSignalProcessor::AudioSignalProcessor(size_t sample_rate, size_t mark_frequency, size_t space_frequency, + size_t bit_rate, size_t window_size) + : input_buffer_size_(window_size), output_sample_count_(0) + { + if (sample_rate % bit_rate != 0) + { + // On ESP32 we can continue execution, but log the error + ESP_LOGW(kLogTag, "Sample rate %zu is not divisible by bit rate %zu", sample_rate, bit_rate); + } + + float normalized_mark_freq = static_cast(mark_frequency) / static_cast(sample_rate); + float normalized_space_freq = static_cast(space_frequency) / static_cast(sample_rate); + + mark_detector_ = std::make_unique(normalized_mark_freq, window_size); + space_detector_ = std::make_unique(normalized_space_freq, window_size); + + samples_per_bit_ = sample_rate / bit_rate; // Number of samples per bit + } + + std::vector AudioSignalProcessor::ProcessAudioSamples(const std::vector &samples) + { + std::vector result; + + for (float sample : samples) + { + if (input_buffer_.size() < input_buffer_size_) + { + input_buffer_.push_back(sample); // Just add, don't process yet + } + else + { + // Input buffer is full, process the data + input_buffer_.pop_front(); // Remove oldest sample + input_buffer_.push_back(sample); // Add new sample + output_sample_count_++; + + if (output_sample_count_ >= samples_per_bit_) + { + // Process all samples in the window using Goertzel algorithm + for (float window_sample : input_buffer_) + { + mark_detector_->ProcessSample(window_sample); + space_detector_->ProcessSample(window_sample); + } + + float mark_amplitude = mark_detector_->GetAmplitude(); // Mark amplitude + float space_amplitude = space_detector_->GetAmplitude(); // Space amplitude + + // Avoid division by zero + float mark_probability = mark_amplitude / + (space_amplitude + mark_amplitude + std::numeric_limits::epsilon()); + result.push_back(mark_probability); + + // Reset detector windows + mark_detector_->Reset(); + space_detector_->Reset(); + output_sample_count_ = 0; // Reset output counter + } + } + } + + return result; + } + + // AudioDataBuffer implementation + AudioDataBuffer::AudioDataBuffer() + : current_state_(DataReceptionState::kInactive), + start_of_transmission_(kDefaultStartTransmissionPattern), + end_of_transmission_(kDefaultEndTransmissionPattern), + enable_checksum_validation_(true) + { + identifier_buffer_size_ = std::max(start_of_transmission_.size(), end_of_transmission_.size()); + max_bit_buffer_size_ = 776; // Preset bit buffer size, 776 bits = (32 + 1 + 63 + 1) * 8 = 776 + + bit_buffer_.reserve(max_bit_buffer_size_); + } + + AudioDataBuffer::AudioDataBuffer(size_t max_byte_size, const std::vector &start_identifier, + const std::vector &end_identifier, bool enable_checksum) + : current_state_(DataReceptionState::kInactive), + start_of_transmission_(start_identifier), + end_of_transmission_(end_identifier), + enable_checksum_validation_(enable_checksum) + { + identifier_buffer_size_ = std::max(start_of_transmission_.size(), end_of_transmission_.size()); + max_bit_buffer_size_ = max_byte_size * 8; // Bit buffer size in bytes + + bit_buffer_.reserve(max_bit_buffer_size_); + } + + uint8_t AudioDataBuffer::CalculateChecksum(const std::string &text) + { + uint8_t checksum = 0; + for (char character : text) + { + checksum += static_cast(character); + } + return checksum; + } + + void AudioDataBuffer::ClearBuffers() + { + identifier_buffer_.clear(); + bit_buffer_.clear(); + } + + bool AudioDataBuffer::ProcessProbabilityData(const std::vector &probabilities, float threshold) + { + for (float probability : probabilities) + { + uint8_t bit = (probability > threshold) ? 1 : 0; + + if (identifier_buffer_.size() >= identifier_buffer_size_) + { + identifier_buffer_.pop_front(); // Maintain buffer size + } + identifier_buffer_.push_back(bit); + + // Process received bit based on state machine + switch (current_state_) + { + case DataReceptionState::kInactive: + if (identifier_buffer_.size() >= start_of_transmission_.size()) + { + current_state_ = DataReceptionState::kWaiting; // Enter waiting state + ESP_LOGI(kLogTag, "Entering Waiting state"); + } + break; + + case DataReceptionState::kWaiting: + // Waiting state, possibly waiting for transmission end + if (identifier_buffer_.size() >= start_of_transmission_.size()) + { + std::vector identifier_snapshot(identifier_buffer_.begin(), identifier_buffer_.end()); + if (identifier_snapshot == start_of_transmission_) + { + ClearBuffers(); // Clear buffers + current_state_ = DataReceptionState::kReceiving; // Enter receiving state + ESP_LOGI(kLogTag, "Entering Receiving state"); + } + } + break; + + case DataReceptionState::kReceiving: + bit_buffer_.push_back(bit); + if (identifier_buffer_.size() >= end_of_transmission_.size()) + { + std::vector identifier_snapshot(identifier_buffer_.begin(), identifier_buffer_.end()); + if (identifier_snapshot == end_of_transmission_) + { + current_state_ = DataReceptionState::kInactive; // Enter inactive state + + // Convert bits to bytes + std::vector bytes = ConvertBitsToBytes(bit_buffer_); + + uint8_t received_checksum = 0; + size_t minimum_length = 0; + + if (enable_checksum_validation_) + { + // If checksum is required, last byte is checksum + minimum_length = 1 + start_of_transmission_.size() / 8; + if (bytes.size() >= minimum_length) + { + received_checksum = bytes[bytes.size() - start_of_transmission_.size() / 8 - 1]; + } + } + else + { + minimum_length = start_of_transmission_.size() / 8; + } + + if (bytes.size() < minimum_length) + { + ClearBuffers(); + ESP_LOGW(kLogTag, "Data too short, clearing buffer"); + return false; // Data too short, return failure + } + + // Extract text data (remove trailing identifier part) + std::vector text_bytes( + bytes.begin(), bytes.begin() + bytes.size() - minimum_length); + + std::string result(text_bytes.begin(), text_bytes.end()); + + // Validate checksum if required + if (enable_checksum_validation_) + { + uint8_t calculated_checksum = CalculateChecksum(result); + if (calculated_checksum != received_checksum) + { + // Checksum mismatch + ESP_LOGW(kLogTag, "Checksum mismatch: expected %d, got %d", + received_checksum, calculated_checksum); + ClearBuffers(); + return false; + } + } + + ClearBuffers(); + decoded_text = result; + return true; // Return success + } + else if (bit_buffer_.size() >= max_bit_buffer_size_) + { + // If not end identifier and bit buffer is full, reset + ClearBuffers(); + ESP_LOGW(kLogTag, "Buffer overflow, clearing buffer"); + current_state_ = DataReceptionState::kInactive; // Reset state machine + } + } + break; + } + } + + return false; + } + + std::vector AudioDataBuffer::ConvertBitsToBytes(const std::vector &bits) const + { + std::vector bytes; + + // Ensure number of bits is a multiple of 8 + size_t complete_bytes_count = bits.size() / 8; + bytes.reserve(complete_bytes_count); + + for (size_t i = 0; i < complete_bytes_count; ++i) + { + uint8_t byte_value = 0; + for (size_t j = 0; j < 8; ++j) + { + byte_value |= bits[i * 8 + j] << (7 - j); + } + bytes.push_back(byte_value); + } + + return bytes; + } +} \ No newline at end of file diff --git a/main/boards/common/afsk_demod.h b/main/boards/common/afsk_demod.h new file mode 100644 index 00000000..13b5bb9e --- /dev/null +++ b/main/boards/common/afsk_demod.h @@ -0,0 +1,176 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include "wifi_configuration_ap.h" +#include "application.h" + +// Audio signal processing constants for WiFi configuration via audio +const size_t kAudioSampleRate = 6400; +const size_t kMarkFrequency = 1800; +const size_t kSpaceFrequency = 1500; +const size_t kBitRate = 100; +const size_t kWindowSize = 64; + +namespace audio_wifi_config +{ + // Main function to receive WiFi credentials through audio signal + void ReceiveWifiCredentialsFromAudio(Application *app, WifiConfigurationAp *wifi_ap); + + /** + * Goertzel algorithm implementation for single frequency detection + * Used to detect specific audio frequencies in the AFSK demodulation process + */ + class FrequencyDetector + { + private: + float frequency_; // Target frequency (normalized, i.e., f / fs) + size_t window_size_; // Window size for analysis + float frequency_bin_; // Frequency bin + float angular_frequency_; // Angular frequency + float cos_coefficient_; // cos(w) + float sin_coefficient_; // sin(w) + float filter_coefficient_; // 2 * cos(w) + std::deque state_buffer_; // Circular buffer for storing S[-1] and S[-2] + + public: + /** + * Constructor + * @param frequency Normalized frequency (f / fs) + * @param window_size Window size for analysis + */ + FrequencyDetector(float frequency, size_t window_size); + + /** + * Reset the detector state + */ + void Reset(); + + /** + * Process one audio sample + * @param sample Input audio sample + */ + void ProcessSample(float sample); + + /** + * Calculate current amplitude + * @return Amplitude value + */ + float GetAmplitude() const; + }; + + /** + * Audio signal processor for Mark/Space frequency pair detection + * Processes audio signals to extract digital data using AFSK demodulation + */ + class AudioSignalProcessor + { + private: + std::deque input_buffer_; // Input sample buffer + size_t input_buffer_size_; // Input buffer size = window size + size_t output_sample_count_; // Output sample counter + size_t samples_per_bit_; // Samples per bit threshold + std::unique_ptr mark_detector_; // Mark frequency detector + std::unique_ptr space_detector_; // Space frequency detector + + public: + /** + * Constructor + * @param sample_rate Audio sampling rate + * @param mark_frequency Mark frequency for digital '1' + * @param space_frequency Space frequency for digital '0' + * @param bit_rate Data transmission bit rate + * @param window_size Analysis window size + */ + AudioSignalProcessor(size_t sample_rate, size_t mark_frequency, size_t space_frequency, + size_t bit_rate, size_t window_size); + + /** + * Process input audio samples + * @param samples Input audio sample vector + * @return Vector of Mark probability values (0.0 to 1.0) + */ + std::vector ProcessAudioSamples(const std::vector &samples); + }; + + /** + * Data reception state machine states + */ + enum class DataReceptionState + { + kInactive, // Waiting for start signal + kWaiting, // Detected potential start, waiting for confirmation + kReceiving // Actively receiving data + }; + + /** + * Data buffer for managing audio-to-digital data conversion + * Handles the complete process from audio signal to decoded text data + */ + class AudioDataBuffer + { + private: + DataReceptionState current_state_; // Current reception state + std::deque identifier_buffer_; // Buffer for start/end identifier detection + size_t identifier_buffer_size_; // Identifier buffer size + std::vector bit_buffer_; // Buffer for storing bit stream + size_t max_bit_buffer_size_; // Maximum bit buffer size + const std::vector start_of_transmission_; // Start-of-transmission identifier + const std::vector end_of_transmission_; // End-of-transmission identifier + bool enable_checksum_validation_; // Whether to validate checksum + + public: + std::optional decoded_text; // Successfully decoded text data + + /** + * Default constructor using predefined start and end identifiers + */ + AudioDataBuffer(); + + /** + * Constructor with custom parameters + * @param max_byte_size Expected maximum data size in bytes + * @param start_identifier Start-of-transmission identifier + * @param end_identifier End-of-transmission identifier + * @param enable_checksum Whether to enable checksum validation + */ + AudioDataBuffer(size_t max_byte_size, const std::vector &start_identifier, + const std::vector &end_identifier, bool enable_checksum = false); + + /** + * Process probability data and attempt to decode + * @param probabilities Vector of Mark probabilities + * @param threshold Decision threshold for bit detection + * @return true if complete data was successfully received and decoded + */ + bool ProcessProbabilityData(const std::vector &probabilities, float threshold = 0.5f); + + /** + * Calculate checksum for ASCII text + * @param text Input text string + * @return Checksum value (0-255) + */ + static uint8_t CalculateChecksum(const std::string &text); + + private: + /** + * Convert bit vector to byte vector + * @param bits Input bit vector + * @return Converted byte vector + */ + std::vector ConvertBitsToBytes(const std::vector &bits) const; + + /** + * Clear all buffers and reset state + */ + void ClearBuffers(); + }; + + // Default start and end transmission identifiers + extern const std::vector kDefaultStartTransmissionPattern; + extern const std::vector kDefaultEndTransmissionPattern; +} \ No newline at end of file diff --git a/main/boards/common/wifi_board.cc b/main/boards/common/wifi_board.cc index 25853bae..5a4a1af2 100644 --- a/main/boards/common/wifi_board.cc +++ b/main/boards/common/wifi_board.cc @@ -20,6 +20,7 @@ #include #include #include +#include "afsk_demod.h" static const char *TAG = "WifiBoard"; @@ -54,6 +55,10 @@ void WifiBoard::EnterWifiConfigMode() { // 播报配置 WiFi 的提示 application.Alert(Lang::Strings::WIFI_CONFIG_MODE, hint.c_str(), "", Lang::Sounds::P3_WIFICONFIG); + + #if USE_ACOUSTIC_WIFI_PROVISIONING + audio_wifi_config::ReceiveWifiCredentialsFromAudio(&application, &wifi_ap); + #endif // Wait forever until reset after configuration while (true) { diff --git a/main/idf_component.yml b/main/idf_component.yml index a780e661..1847ab22 100644 --- a/main/idf_component.yml +++ b/main/idf_component.yml @@ -12,7 +12,7 @@ dependencies: espressif/esp_io_expander_tca9554: ==2.0.0 espressif/esp_lcd_panel_io_additions: ^1.0.1 78/esp_lcd_nv3023: ~1.0.0 - 78/esp-wifi-connect: ~2.4.2 + 78/esp-wifi-connect: ~2.4.3 78/esp-opus-encoder: ~2.3.3 78/esp-ml307: ~2.2.1 78/xiaozhi-fonts: ~1.3.2