diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index 34bdb8d7..0318e4ea 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES "audio_codecs/audio_codec.cc" "audio_codecs/es8311_audio_codec.cc" "audio_codecs/es8374_audio_codec.cc" "audio_codecs/es8388_audio_codec.cc" + "audio_processing/audio_debugger.cc" "led/single_led.cc" "led/circular_strip.cc" "led/gpio_led.cc" diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index 739f5b22..f85c9e1f 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -392,6 +392,19 @@ config USE_SERVER_AEC help 启用服务器端 AEC,需要服务器支持 +config USE_AUDIO_DEBUGGER + bool "Enable Audio Debugger" + default n + help + 启用音频调试功能,通过UDP发送音频数据 + +config AUDIO_DEBUG_UDP_SERVER + string "Audio Debug UDP Server Address" + default "192.168.2.100:8000" + depends on USE_AUDIO_DEBUGGER + help + UDP服务器地址,格式: IP:PORT,用于接收音频调试数据 + choice IOT_PROTOCOL prompt "IoT Protocol" default IOT_PROTOCOL_MCP diff --git a/main/application.cc b/main/application.cc index 53aa8c3e..42c05469 100644 --- a/main/application.cc +++ b/main/application.cc @@ -10,6 +10,7 @@ #include "iot/thing_manager.h" #include "assets/lang_config.h" #include "mcp_server.h" +#include "audio_debugger.h" #if CONFIG_USE_AUDIO_PROCESSOR #include "afe_audio_processor.h" @@ -569,6 +570,7 @@ void Application::Start() { }); bool protocol_started = protocol_->Start(); + audio_debugger_ = std::make_unique(); audio_processor_->Initialize(codec); audio_processor_->OnOutput([this](std::vector&& data) { { @@ -884,6 +886,12 @@ bool Application::ReadAudio(std::vector& data, int sample_rate, int sam return false; } } + + // 音频调试:发送原始音频数据 + if (audio_debugger_) { + audio_debugger_->Feed(data); + } + return true; } diff --git a/main/application.h b/main/application.h index fd138447..3a9b6fd1 100644 --- a/main/application.h +++ b/main/application.h @@ -22,6 +22,7 @@ #include "background_task.h" #include "audio_processor.h" #include "wake_word.h" +#include "audio_debugger.h" #define SCHEDULE_EVENT (1 << 0) #define SEND_AUDIO_EVENT (1 << 1) @@ -86,6 +87,7 @@ private: std::unique_ptr wake_word_; std::unique_ptr audio_processor_; + std::unique_ptr audio_debugger_; Ota ota_; std::mutex mutex_; std::list> main_tasks_; diff --git a/main/audio_codecs/audio_codec.h b/main/audio_codecs/audio_codec.h index beb71a6d..fb561061 100644 --- a/main/audio_codecs/audio_codec.h +++ b/main/audio_codecs/audio_codec.h @@ -13,7 +13,7 @@ #define AUDIO_CODEC_DMA_DESC_NUM 6 #define AUDIO_CODEC_DMA_FRAME_NUM 240 -#define AUDIO_CODEC_DEFAULT_MIC_GAIN 36.0 +#define AUDIO_CODEC_DEFAULT_MIC_GAIN 30.0 class AudioCodec { public: diff --git a/main/audio_codecs/box_audio_codec.cc b/main/audio_codecs/box_audio_codec.cc index 58e0270d..acf8f2a8 100644 --- a/main/audio_codecs/box_audio_codec.cc +++ b/main/audio_codecs/box_audio_codec.cc @@ -199,7 +199,7 @@ void BoxAudioCodec::EnableInput(bool enable) { fs.channel_mask |= ESP_CODEC_DEV_MAKE_CHANNEL_MASK(1); } ESP_ERROR_CHECK(esp_codec_dev_open(input_dev_, &fs)); - ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), 36.0)); + ESP_ERROR_CHECK(esp_codec_dev_set_in_channel_gain(input_dev_, ESP_CODEC_DEV_MAKE_CHANNEL_MASK(0), AUDIO_CODEC_DEFAULT_MIC_GAIN)); } else { ESP_ERROR_CHECK(esp_codec_dev_close(input_dev_)); } diff --git a/main/audio_processing/afe_wake_word.cc b/main/audio_processing/afe_wake_word.cc index af7bfa06..77955970 100644 --- a/main/audio_processing/afe_wake_word.cc +++ b/main/audio_processing/afe_wake_word.cc @@ -155,18 +155,19 @@ void AfeWakeWord::EncodeWakeWordData() { auto encoder = std::make_unique(16000, 1, OPUS_FRAME_DURATION_MS); encoder->SetComplexity(0); // 0 is the fastest + int packets = 0; for (auto& pcm: this_->wake_word_pcm_) { encoder->Encode(std::move(pcm), [this_](std::vector&& opus) { std::lock_guard lock(this_->wake_word_mutex_); this_->wake_word_opus_.emplace_back(std::move(opus)); this_->wake_word_cv_.notify_all(); }); + packets++; } this_->wake_word_pcm_.clear(); auto end_time = esp_timer_get_time(); - ESP_LOGI(TAG, "Encode wake word opus %u packets in %lld ms", - this_->wake_word_opus_.size(), (end_time - start_time) / 1000); + ESP_LOGI(TAG, "Encode wake word opus %d packets in %ld ms", packets, (long)((end_time - start_time) / 1000)); std::lock_guard lock(this_->wake_word_mutex_); this_->wake_word_opus_.push_back(std::vector()); diff --git a/main/audio_processing/audio_debugger.cc b/main/audio_processing/audio_debugger.cc new file mode 100644 index 00000000..216b2258 --- /dev/null +++ b/main/audio_processing/audio_debugger.cc @@ -0,0 +1,64 @@ +#include "audio_debugger.h" +#include "sdkconfig.h" + +#if CONFIG_USE_AUDIO_DEBUGGER +#include +#include +#include +#include +#include +#include +#endif + +#define TAG "AudioDebugger" + + +AudioDebugger::AudioDebugger() { +#if CONFIG_USE_AUDIO_DEBUGGER + udp_sockfd_ = socket(AF_INET, SOCK_DGRAM, 0); + if (udp_sockfd_ >= 0) { + // 解析配置的服务器地址 "IP:PORT" + std::string server_addr = CONFIG_AUDIO_DEBUG_UDP_SERVER; + size_t colon_pos = server_addr.find(':'); + + if (colon_pos != std::string::npos) { + std::string ip = server_addr.substr(0, colon_pos); + int port = std::stoi(server_addr.substr(colon_pos + 1)); + + memset(&udp_server_addr_, 0, sizeof(udp_server_addr_)); + udp_server_addr_.sin_family = AF_INET; + udp_server_addr_.sin_port = htons(port); + inet_pton(AF_INET, ip.c_str(), &udp_server_addr_.sin_addr); + + ESP_LOGI(TAG, "Initialized server address: %s", CONFIG_AUDIO_DEBUG_UDP_SERVER); + } else { + ESP_LOGW(TAG, "Invalid server address: %s, should be IP:PORT", CONFIG_AUDIO_DEBUG_UDP_SERVER); + close(udp_sockfd_); + udp_sockfd_ = -1; + } + } else { + ESP_LOGW(TAG, "Failed to create UDP socket: %d", errno); + } +#endif +} + +AudioDebugger::~AudioDebugger() { + if (udp_sockfd_ >= 0) { + close(udp_sockfd_); + ESP_LOGI(TAG, "Closed UDP socket"); + } +} + +void AudioDebugger::Feed(const std::vector& data) { + if (udp_sockfd_ >= 0) { + ssize_t sent = sendto(udp_sockfd_, data.data(), data.size() * sizeof(int16_t), 0, + (struct sockaddr*)&udp_server_addr_, sizeof(udp_server_addr_)); + if (sent < 0) { + ESP_LOGW(TAG, "Failed to send audio data to %s: %d", CONFIG_AUDIO_DEBUG_UDP_SERVER, errno); + } else { + ESP_LOGD(TAG, "Sent %d bytes audio data to %s", sent, CONFIG_AUDIO_DEBUG_UDP_SERVER); + } + } +} + + \ No newline at end of file diff --git a/main/audio_processing/audio_debugger.h b/main/audio_processing/audio_debugger.h new file mode 100644 index 00000000..a81336cb --- /dev/null +++ b/main/audio_processing/audio_debugger.h @@ -0,0 +1,22 @@ +#ifndef AUDIO_DEBUGGER_H +#define AUDIO_DEBUGGER_H + +#include +#include + +#include +#include + +class AudioDebugger { +public: + AudioDebugger(); + ~AudioDebugger(); + + void Feed(const std::vector& data); + +private: + int udp_sockfd_ = -1; + struct sockaddr_in udp_server_addr_; +}; + +#endif \ No newline at end of file diff --git a/main/protocols/protocol.cc b/main/protocols/protocol.cc index d5c1cb5e..1d915be8 100644 --- a/main/protocols/protocol.cc +++ b/main/protocols/protocol.cc @@ -126,7 +126,7 @@ bool Protocol::IsTimeout() const { auto duration = std::chrono::duration_cast(now - last_incoming_time_); bool timeout = duration.count() > kTimeoutSeconds; if (timeout) { - ESP_LOGE(TAG, "Channel timeout %lld seconds", duration.count()); + ESP_LOGE(TAG, "Channel timeout %ld seconds", (long)duration.count()); } return timeout; } diff --git a/scripts/audio_debug_server.py b/scripts/audio_debug_server.py new file mode 100644 index 00000000..872c4905 --- /dev/null +++ b/scripts/audio_debug_server.py @@ -0,0 +1,54 @@ +import socket +import wave +import argparse + + +''' + Create a UDP socket and bind it to the server's IP:8000. + Listen for incoming messages and print them to the console. + Save the audio to a WAV file. +''' +def main(samplerate, channels): + # Create a UDP socket + server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + server_socket.bind(('0.0.0.0', 8000)) + + # Create WAV file with parameters + filename = f"{samplerate}_{channels}.wav" + wav_file = wave.open(filename, "wb") + wav_file.setnchannels(channels) # channels parameter + wav_file.setsampwidth(2) # 2 bytes per sample (16-bit) + wav_file.setframerate(samplerate) # samplerate parameter + + print(f"Start saving audio from 0.0.0.0:8000 to {filename}...") + + try: + while True: + # Receive a message from the client + message, address = server_socket.recvfrom(8000) + + # Write PCM data to WAV file + wav_file.writeframes(message) + + # Print length of the message + print(f"Received {len(message)} bytes from {address}") + + except KeyboardInterrupt: + print("\nStopping recording...") + + finally: + # Close files and socket + wav_file.close() + server_socket.close() + print(f"WAV file '{filename}' saved successfully") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='UDP音频数据接收器,保存为WAV文件') + parser.add_argument('--samplerate', '-s', type=int, default=16000, + help='采样率 (默认: 16000)') + parser.add_argument('--channels', '-c', type=int, default=2, + help='声道数 (默认: 2)') + + args = parser.parse_args() + main(args.samplerate, args.channels)