add play_p3 and README

2025-03-12 23:28:20 +08:00
parent 45fa2ca389
commit 5a2e9ac87f
4 changed files with 146 additions and 0 deletions
--- a/scripts/p3_tools/README.md
+++ b/scripts/p3_tools/README.md
@@ -0,0 +1,61 @@
+# P3音频格式转换与播放工具
+
+这个目录包含两个用于处理P3格式音频文件的Python脚本：
+
+## 1. 音频转换工具 (convert_audio_to_p3.py)
+
+将普通音频文件转换为P3格式（4字节header + Opus数据包的流式结构）。
+
+### 使用方法
+
+```bash
+python convert_audio_to_p3.py <输入音频文件> <输出P3文件>
+```
+
+例如：
+```bash
+python convert_audio_to_p3.py input.mp3 output.p3
+```
+
+## 2. P3音频播放工具 (play_p3.py)
+
+播放P3格式的音频文件。
+
+### 特性
+
+- 解码并播放P3格式的音频文件
+- 在播放结束或用户中断时应用淡出效果，避免破音
+- 支持通过命令行参数指定要播放的文件
+
+### 使用方法
+
+```bash
+python play_p3.py <P3文件路径>
+```
+
+例如：
+```bash
+python play_p3.py output.p3
+```
+
+## 依赖安装
+
+在使用这些脚本前，请确保安装了所需的Python库：
+
+```bash
+pip install librosa opuslib numpy tqdm sounddevice
+```
+
+或者使用提供的requirements.txt文件：
+
+```bash
+pip install -r requirements.txt
+```
+
+## P3格式说明
+
+P3格式是一种简单的流式音频格式，结构如下：
+- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成
+- 头部格式：[1字节类型, 1字节保留, 2字节长度]
+- 采样率固定为16000Hz，单声道
+- 每帧时长为60ms 
--- a/scripts/p3_tools/convert_audio_to_p3.py
+++ b/scripts/p3_tools/convert_audio_to_p3.py
@@ -0,0 +1,48 @@
+# convert audio files to protocol v3 stream
+import librosa
+import opuslib
+import struct
+import sys
+import tqdm
+import numpy as np
+
+def encode_audio_to_opus(input_file, output_file):
+    # Load audio file using librosa
+    audio, sample_rate = librosa.load(input_file, sr=None, mono=False, dtype=np.float32)
+    
+    # Convert sample rate to 16000Hz if necessary
+    target_sample_rate = 16000
+    if sample_rate != target_sample_rate:
+        audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=target_sample_rate)
+        sample_rate = target_sample_rate
+    
+    # Get left channel if stereo
+    if audio.ndim == 2:
+        audio = audio[0]
+    
+    # Convert audio data back to int16 after resampling
+    audio = (audio * 32767).astype(np.int16)
+    
+    # Initialize Opus encoder
+    encoder = opuslib.Encoder(sample_rate, 1, opuslib.APPLICATION_AUDIO)
+
+    # Encode audio data to Opus packets
+    # Save encoded data to file
+    with open(output_file, 'wb') as f:
+        duration = 60 # 60ms every frame
+        frame_size = int(sample_rate * duration / 1000)
+        for i in tqdm.tqdm(range(0, len(audio) - frame_size, frame_size)):
+            frame = audio[i:i + frame_size]
+            opus_data = encoder.encode(frame.tobytes(), frame_size=frame_size)
+            # protocol format, [1u type, 1u reserved, 2u len, data]
+            packet = struct.pack('>BBH', 0, 0, len(opus_data)) + opus_data
+            f.write(packet)
+
+# Example usage
+if len(sys.argv) != 3:
+    print('Usage: python convert.py <input_file> <output_file>')
+    sys.exit(1)
+
+input_file = sys.argv[1]
+output_file = sys.argv[2]
+encode_audio_to_opus(input_file, output_file)
--- a/scripts/p3_tools/play_p3.py
+++ b/scripts/p3_tools/play_p3.py
@@ -0,0 +1,80 @@
+# 播放p3格式的音频文件
+import opuslib
+import struct
+import numpy as np
+import sounddevice as sd
+import time
+import argparse
+
+def play_p3_file(input_file):
+    """
+    播放p3格式的音频文件
+    p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据]
+    """
+    # 初始化Opus解码器
+    sample_rate = 16000  # 采样率固定为16000Hz
+    channels = 1  # 单声道
+    decoder = opuslib.Decoder(sample_rate, channels)
+    
+    # 帧大小 (60ms)
+    frame_size = int(sample_rate * 60 / 1000)
+    
+    # 打开音频流
+    stream = sd.OutputStream(
+        samplerate=sample_rate,
+        channels=channels,
+        dtype='int16'
+    )
+    stream.start()
+    
+    try:
+        with open(input_file, 'rb') as f:
+            print(f"正在播放: {input_file}")
+            
+            while True:
+                # 读取头部 (4字节)
+                header = f.read(4)
+                if not header or len(header) < 4:
+                    break
+                
+                # 解析头部
+                packet_type, reserved, data_len = struct.unpack('>BBH', header)
+                
+                # 读取Opus数据
+                opus_data = f.read(data_len)
+                if not opus_data or len(opus_data) < data_len:
+                    break
+                
+                # 解码Opus数据
+                pcm_data = decoder.decode(opus_data, frame_size)
+                
+                # 将字节转换为numpy数组
+                audio_array = np.frombuffer(pcm_data, dtype=np.int16)
+                
+                # 播放音频
+                stream.write(audio_array)
+                
+                # 等待一帧的时间
+                time.sleep(60 / 1000)  # 60ms
+            
+            # 播放结束后添加0.5秒静音，避免破音
+            silence = np.zeros(int(sample_rate / 2), dtype=np.int16)
+            stream.write(silence)
+            time.sleep(0.5)  # 等待1秒
+                
+    except KeyboardInterrupt:
+        print("\n播放已停止")
+    finally:
+        stream.stop()
+        stream.close()
+        print("播放完成")
+
+def main():
+    parser = argparse.ArgumentParser(description='播放p3格式的音频文件')
+    parser.add_argument('input_file', help='输入的p3文件路径')
+    args = parser.parse_args()
+    
+    play_p3_file(args.input_file)
+
+if __name__ == "__main__":
+    main() 
--- a/scripts/p3_tools/requirements.txt
+++ b/scripts/p3_tools/requirements.txt
@@ -0,0 +1,5 @@
+librosa>=0.9.2
+opuslib>=3.0.1
+numpy>=1.20.0
+tqdm>=4.62.0
+sounddevice>=0.4.4