forked from xiaozhi/xiaozhi-esp32
add play_p3 and README
This commit is contained in:
61
scripts/p3_tools/README.md
Normal file
61
scripts/p3_tools/README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
# P3音频格式转换与播放工具
|
||||
|
||||
这个目录包含两个用于处理P3格式音频文件的Python脚本:
|
||||
|
||||
## 1. 音频转换工具 (convert_audio_to_p3.py)
|
||||
|
||||
将普通音频文件转换为P3格式(4字节header + Opus数据包的流式结构)。
|
||||
|
||||
### 使用方法
|
||||
|
||||
```bash
|
||||
python convert_audio_to_p3.py <输入音频文件> <输出P3文件>
|
||||
```
|
||||
|
||||
例如:
|
||||
```bash
|
||||
python convert_audio_to_p3.py input.mp3 output.p3
|
||||
```
|
||||
|
||||
## 2. P3音频播放工具 (play_p3.py)
|
||||
|
||||
播放P3格式的音频文件。
|
||||
|
||||
### 特性
|
||||
|
||||
- 解码并播放P3格式的音频文件
|
||||
- 在播放结束或用户中断时应用淡出效果,避免破音
|
||||
- 支持通过命令行参数指定要播放的文件
|
||||
|
||||
### 使用方法
|
||||
|
||||
```bash
|
||||
python play_p3.py <P3文件路径>
|
||||
```
|
||||
|
||||
例如:
|
||||
```bash
|
||||
python play_p3.py output.p3
|
||||
```
|
||||
|
||||
## 依赖安装
|
||||
|
||||
在使用这些脚本前,请确保安装了所需的Python库:
|
||||
|
||||
```bash
|
||||
pip install librosa opuslib numpy tqdm sounddevice
|
||||
```
|
||||
|
||||
或者使用提供的requirements.txt文件:
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## P3格式说明
|
||||
|
||||
P3格式是一种简单的流式音频格式,结构如下:
|
||||
- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成
|
||||
- 头部格式:[1字节类型, 1字节保留, 2字节长度]
|
||||
- 采样率固定为16000Hz,单声道
|
||||
- 每帧时长为60ms
|
||||
48
scripts/p3_tools/convert_audio_to_p3.py
Normal file
48
scripts/p3_tools/convert_audio_to_p3.py
Normal file
@@ -0,0 +1,48 @@
|
||||
# convert audio files to protocol v3 stream
|
||||
import librosa
|
||||
import opuslib
|
||||
import struct
|
||||
import sys
|
||||
import tqdm
|
||||
import numpy as np
|
||||
|
||||
def encode_audio_to_opus(input_file, output_file):
|
||||
# Load audio file using librosa
|
||||
audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
|
||||
|
||||
# Convert sample rate to 16000Hz if necessary
|
||||
target_sample_rate = 16000
|
||||
if sample_rate != target_sample_rate:
|
||||
audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
|
||||
sample_rate = target_sample_rate
|
||||
|
||||
# Get left channel if stereo
|
||||
if audio.ndim == 2:
|
||||
audio = audio[0]
|
||||
|
||||
# Convert audio data back to int16 after resampling
|
||||
audio = (audio * 32767).astype(np.int16)
|
||||
|
||||
# Initialize Opus encoder
|
||||
encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_AUDIO)
|
||||
|
||||
# Encode audio data to Opus packets
|
||||
# Save encoded data to file
|
||||
with open(output_file, 'wb') as f:
|
||||
duration = 60 # 60ms every frame
|
||||
frame_size = int(sample_rate * duration / 1000)
|
||||
for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
|
||||
frame = audio[i:i + frame_size]
|
||||
opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size)
|
||||
# protocol format, [1u type, 1u reserved, 2u len, data]
|
||||
packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
|
||||
f.write(packet)
|
||||
|
||||
# Example usage
|
||||
if len(sys.argv) != 3:
|
||||
print('Usage: python convert.py <input_file> <output_file>')
|
||||
sys.exit(1)
|
||||
|
||||
input_file = sys.argv[1]
|
||||
output_file = sys.argv[2]
|
||||
encode_audio_to_opus(input_file, output_file)
|
||||
80
scripts/p3_tools/play_p3.py
Normal file
80
scripts/p3_tools/play_p3.py
Normal file
@@ -0,0 +1,80 @@
|
||||
# 播放p3格式的音频文件
|
||||
import opuslib
|
||||
import struct
|
||||
import numpy as np
|
||||
import sounddevice as sd
|
||||
import time
|
||||
import argparse
|
||||
|
||||
def play_p3_file(input_file):
|
||||
"""
|
||||
播放p3格式的音频文件
|
||||
p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
|
||||
"""
|
||||
# 初始化Opus解码器
|
||||
sample_rate = 16000 # 采样率固定为16000Hz
|
||||
channels = 1 # 单声道
|
||||
decoder = opuslib.Decoder(sample_rate, channels)
|
||||
|
||||
# 帧大小 (60ms)
|
||||
frame_size = int(sample_rate * 60 / 1000)
|
||||
|
||||
# 打开音频流
|
||||
stream = sd.OutputStream(
|
||||
samplerate=sample_rate,
|
||||
channels=channels,
|
||||
dtype='int16'
|
||||
)
|
||||
stream.start()
|
||||
|
||||
try:
|
||||
with open(input_file, 'rb') as f:
|
||||
print(f"正在播放: {input_file}")
|
||||
|
||||
while True:
|
||||
# 读取头部 (4字节)
|
||||
header = f.read(4)
|
||||
if not header or len(header) < 4:
|
||||
break
|
||||
|
||||
# 解析头部
|
||||
packet_type, reserved, data_len = struct.unpack('>BBH', header)
|
||||
|
||||
# 读取Opus数据
|
||||
opus_data = f.read(data_len)
|
||||
if not opus_data or len(opus_data) < data_len:
|
||||
break
|
||||
|
||||
# 解码Opus数据
|
||||
pcm_data = decoder.decode(opus_data, frame_size)
|
||||
|
||||
# 将字节转换为numpy数组
|
||||
audio_array = np.frombuffer(pcm_data, dtype=np.int16)
|
||||
|
||||
# 播放音频
|
||||
stream.write(audio_array)
|
||||
|
||||
# 等待一帧的时间
|
||||
time.sleep(60 / 1000) # 60ms
|
||||
|
||||
# 播放结束后添加0.5秒静音,避免破音
|
||||
silence = np.zeros(int(sample_rate / 2), dtype=np.int16)
|
||||
stream.write(silence)
|
||||
time.sleep(0.5) # 等待1秒
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\n播放已停止")
|
||||
finally:
|
||||
stream.stop()
|
||||
stream.close()
|
||||
print("播放完成")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='播放p3格式的音频文件')
|
||||
parser.add_argument('input_file', help='输入的p3文件路径')
|
||||
args = parser.parse_args()
|
||||
|
||||
play_p3_file(args.input_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5
scripts/p3_tools/requirements.txt
Normal file
5
scripts/p3_tools/requirements.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
librosa>=0.9.2
|
||||
opuslib>=3.0.1
|
||||
numpy>=1.20.0
|
||||
tqdm>=4.62.0
|
||||
sounddevice>=0.4.4
|
||||
Reference in New Issue
Block a user