diff --git a/CMakeLists.txt b/CMakeLists.txt index f994f09d..dbf558ce 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # CMakeLists in this exact order for cmake to work correctly cmake_minimum_required(VERSION 3.16) -set(PROJECT_VER "0.6.1") +set(PROJECT_VER "0.6.2") include($ENV{IDF_PATH}/tools/cmake/project.cmake) project(xiaozhi) diff --git a/convert_audio_to_p3.py b/convert_audio_to_p3.py new file mode 100644 index 00000000..6942deed --- /dev/null +++ b/convert_audio_to_p3.py @@ -0,0 +1,40 @@ +# convert audio files to protocol v3 stream +import librosa +import opuslib +import struct +import sys +import tqdm +import numpy as np + +def encode_audio_to_opus(input_file, output_file): + # Load audio file using librosa + audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.int16) + + # Get left channel if stereo + if audio.ndim == 2: + audio = audio[0] + + # Initialize Opus encoder + encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_VOIP) + + # Encode audio data to Opus packets + # Save encoded data to file + with open(output_file, 'wb') as f: + sample_rate = 16000 # 16000Hz + duration = 60 # 60ms every frame + frame_size = int(sample_rate * duration / 1000) + for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)): + frame = audio[i:i + frame_size] + opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size) + # protocol format, [1u type, 1u reserved, 2u len, data] + packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data + f.write(packet) + +# Example usage +if len(sys.argv) != 3: + print('Usage: python convert.py ') + sys.exit(1) + +input_file = sys.argv[1] +output_file = sys.argv[2] +encode_audio_to_opus(input_file, output_file) diff --git a/main/BoxAudioDevice.cc b/main/BoxAudioDevice.cc index a02aa945..6296c604 100644 --- a/main/BoxAudioDevice.cc +++ b/main/BoxAudioDevice.cc @@ -219,12 +219,16 @@ void BoxAudioDevice::CreateDuplexChannels() { } int BoxAudioDevice::Read(int16_t *buffer, int samples) { - ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_read(input_dev_, (void*)buffer, samples * sizeof(int16_t))); + if (input_enabled_) { + ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_read(input_dev_, (void*)buffer, samples * sizeof(int16_t))); + } return samples; } int BoxAudioDevice::Write(const int16_t *buffer, int samples) { - ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_write(output_dev_, (void*)buffer, samples * sizeof(int16_t))); + if (output_enabled_) { + ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_write(output_dev_, (void*)buffer, samples * sizeof(int16_t))); + } return samples; }