From 5a2e9ac87fb10800467687d310c53452399155ca Mon Sep 17 00:00:00 2001 From: Xiaoxia Date: Wed, 12 Mar 2025 23:28:20 +0800 Subject: [PATCH] add play_p3 and README --- scripts/p3_tools/README.md | 61 ++++++++++++++ scripts/{ => p3_tools}/convert_audio_to_p3.py | 0 scripts/p3_tools/play_p3.py | 80 +++++++++++++++++++ scripts/p3_tools/requirements.txt | 5 ++ 4 files changed, 146 insertions(+) create mode 100644 scripts/p3_tools/README.md rename scripts/{ => p3_tools}/convert_audio_to_p3.py (100%) create mode 100644 scripts/p3_tools/play_p3.py create mode 100644 scripts/p3_tools/requirements.txt diff --git a/scripts/p3_tools/README.md b/scripts/p3_tools/README.md new file mode 100644 index 00000000..d760c100 --- /dev/null +++ b/scripts/p3_tools/README.md @@ -0,0 +1,61 @@ +# P3音频格式转换与播放工具 + +这个目录包含两个用于处理P3格式音频文件的Python脚本: + +## 1. 音频转换工具 (convert_audio_to_p3.py) + +将普通音频文件转换为P3格式(4字节header + Opus数据包的流式结构)。 + +### 使用方法 + +```bash +python convert_audio_to_p3.py <输入音频文件> <输出P3文件> +``` + +例如: +```bash +python convert_audio_to_p3.py input.mp3 output.p3 +``` + +## 2. P3音频播放工具 (play_p3.py) + +播放P3格式的音频文件。 + +### 特性 + +- 解码并播放P3格式的音频文件 +- 在播放结束或用户中断时应用淡出效果,避免破音 +- 支持通过命令行参数指定要播放的文件 + +### 使用方法 + +```bash +python play_p3.py +``` + +例如: +```bash +python play_p3.py output.p3 +``` + +## 依赖安装 + +在使用这些脚本前,请确保安装了所需的Python库: + +```bash +pip install librosa opuslib numpy tqdm sounddevice +``` + +或者使用提供的requirements.txt文件: + +```bash +pip install -r requirements.txt +``` + +## P3格式说明 + +P3格式是一种简单的流式音频格式,结构如下: +- 每个音频帧由一个4字节的头部和一个Opus编码的数据包组成 +- 头部格式:[1字节类型, 1字节保留, 2字节长度] +- 采样率固定为16000Hz,单声道 +- 每帧时长为60ms \ No newline at end of file diff --git a/scripts/convert_audio_to_p3.py b/scripts/p3_tools/convert_audio_to_p3.py similarity index 100% rename from scripts/convert_audio_to_p3.py rename to scripts/p3_tools/convert_audio_to_p3.py diff --git a/scripts/p3_tools/play_p3.py b/scripts/p3_tools/play_p3.py new file mode 100644 index 00000000..dbb5349d --- /dev/null +++ b/scripts/p3_tools/play_p3.py @@ -0,0 +1,80 @@ +# 播放p3格式的音频文件 +import opuslib +import struct +import numpy as np +import sounddevice as sd +import time +import argparse + +def play_p3_file(input_file): + """ + 播放p3格式的音频文件 + p3格式: [1字节类型, 1字节保留, 2字节长度, Opus数据] + """ + # 初始化Opus解码器 + sample_rate = 16000 # 采样率固定为16000Hz + channels = 1 # 单声道 + decoder = opuslib.Decoder(sample_rate, channels) + + # 帧大小 (60ms) + frame_size = int(sample_rate * 60 / 1000) + + # 打开音频流 + stream = sd.OutputStream( + samplerate=sample_rate, + channels=channels, + dtype='int16' + ) + stream.start() + + try: + with open(input_file, 'rb') as f: + print(f"正在播放: {input_file}") + + while True: + # 读取头部 (4字节) + header = f.read(4) + if not header or len(header) < 4: + break + + # 解析头部 + packet_type, reserved, data_len = struct.unpack('>BBH', header) + + # 读取Opus数据 + opus_data = f.read(data_len) + if not opus_data or len(opus_data) < data_len: + break + + # 解码Opus数据 + pcm_data = decoder.decode(opus_data, frame_size) + + # 将字节转换为numpy数组 + audio_array = np.frombuffer(pcm_data, dtype=np.int16) + + # 播放音频 + stream.write(audio_array) + + # 等待一帧的时间 + time.sleep(60 / 1000) # 60ms + + # 播放结束后添加0.5秒静音,避免破音 + silence = np.zeros(int(sample_rate / 2), dtype=np.int16) + stream.write(silence) + time.sleep(0.5) # 等待1秒 + + except KeyboardInterrupt: + print("\n播放已停止") + finally: + stream.stop() + stream.close() + print("播放完成") + +def main(): + parser = argparse.ArgumentParser(description='播放p3格式的音频文件') + parser.add_argument('input_file', help='输入的p3文件路径') + args = parser.parse_args() + + play_p3_file(args.input_file) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/scripts/p3_tools/requirements.txt b/scripts/p3_tools/requirements.txt new file mode 100644 index 00000000..64d4cc8a --- /dev/null +++ b/scripts/p3_tools/requirements.txt @@ -0,0 +1,5 @@ +librosa>=0.9.2 +opuslib>=3.0.1 +numpy>=1.20.0 +tqdm>=4.62.0 +sounddevice>=0.4.4 \ No newline at end of file