forked from xiaozhi/xiaozhi-esp32
1.3.1 Updates
- Add startup and network failure sound effects - 12864 OLED scroll text - Internalization of volume actions
This commit is contained in:
@@ -8,19 +8,27 @@ import numpy as np
|
||||
|
||||
def encode_audio_to_opus(input_file, output_file):
|
||||
# Load audio file using librosa
|
||||
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.int16)
|
||||
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
|
||||
|
||||
# Convert sample rate to 16000Hz if necessary
|
||||
target_sample_rate = 16000
|
||||
if sample_rate != target_sample_rate:
|
||||
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
|
||||
sample_rate = target_sample_rate
|
||||
|
||||
# Get left channel if stereo
|
||||
if audio.ndim == 2:
|
||||
audio = audio[0]
|
||||
|
||||
# Convert audio data back to int16 after resampling
|
||||
audio = (audio * 32767).astype(np.int16)
|
||||
|
||||
# Initialize Opus encoder
|
||||
encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_VOIP)
|
||||
|
||||
# Encode audio data to Opus packets
|
||||
# Save encoded data to file
|
||||
with open(output_file, 'wb') as f:
|
||||
sample_rate = 16000 # 16000Hz
|
||||
duration = 60 # 60ms every frame
|
||||
frame_size = int(sample_rate * duration / 1000)
|
||||
for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
|
||||
|
||||
Reference in New Issue
Block a user