Files
dsProject/dsLightRag/Util/GengerateAudio.py

319 lines
11 KiB
Python
Raw Normal View History

2025-09-02 06:55:13 +08:00
#coding=utf-8
'''
字节跳动语音合成API封装类
requires Python 3.6 or later
pip install requests
'''
import base64
import json
import uuid
import requests
from typing import Optional, Dict, Any
from pathlib import Path
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
# 在ByteDanceTTS类中添加以下音色分类字典
class ByteDanceTTS:
"""
字节跳动语音合成API封装类
提供文本转语音功能
"""
# 音色分类字典
TTS_VOICES = {
"通用场景": {
2025-09-02 07:36:42 +08:00
"BV700_V2_streaming": "灿灿 2.0",
"BV705_streaming": "炀炀",
"BV701_V2_streaming": "擎苍 2.0",
"BV001_V2_streaming": "通用女声 2.0",
"BV700_streaming": "灿灿",
"BV406_V2_streaming": "超自然音色-梓梓2.0",
"BV406_streaming": "超自然音色-梓梓",
"BV407_V2_streaming": "超自然音色-燃燃2.0",
"BV407_streaming": "超自然音色-燃燃",
"BV001_streaming": "通用女声12种情感",
"BV002_streaming": "通用男声"
2025-09-02 06:55:13 +08:00
},
"有声阅读": {
2025-09-02 07:36:42 +08:00
"BV701_streaming": "擎苍",
"BV123_streaming": "阳光青年",
"BV120_streaming": "反卷青年",
"BV119_streaming": "通用赘婿",
"BV115_streaming": "古风少御",
"BV107_streaming": "霸气青叔",
"BV100_streaming": "质朴青年",
"BV104_streaming": "温柔淑女",
"BV004_streaming": "开朗青年",
"BV113_streaming": "甜宠少御",
"BV102_streaming": "儒雅青年"
2025-09-02 06:55:13 +08:00
},
"智能助手": {
2025-09-02 07:36:42 +08:00
"BV405_streaming": "甜美小源",
"BV007_streaming": "亲切女声",
"BV009_streaming": "知性女声",
"BV419_streaming": "诚诚",
"BV415_streaming": "童童",
"BV008_streaming": "亲切男声"
2025-09-02 06:55:13 +08:00
},
"视频配音": {
2025-09-02 07:36:42 +08:00
"BV408_streaming": "译制片男声",
"BV426_streaming": "懒小羊",
"BV428_streaming": "清新文艺女声",
"BV403_streaming": "鸡汤女声",
"BV158_streaming": "智慧老者",
"BV157_streaming": "慈爱姥姥",
"BR001_streaming": "说唱小哥",
"BV410_streaming": "活力解说男",
"BV411_streaming": "影视解说小帅",
"BV437_streaming": "解说小帅-多情感",
"BV412_streaming": "影视解说小美",
"BV159_streaming": "纨绔青年",
"BV418_streaming": "直播一姐",
"BV142_streaming": "沉稳解说男",
"BV143_streaming": "潇洒青年",
"BV056_streaming": "阳光男声",
"BV005_streaming": "活泼女声",
"BV064_streaming": "小萝莉"
2025-09-02 06:55:13 +08:00
},
"特色音色": {
2025-09-02 07:36:42 +08:00
"BV051_streaming": "奶气萌娃",
"BV063_streaming": "动漫海绵",
"BV417_streaming": "动漫海星",
"BV050_streaming": "动漫小新",
"BV061_streaming": "天才童声"
2025-09-02 06:55:13 +08:00
},
"广告配音": {
2025-09-02 07:36:42 +08:00
"BV401_streaming": "促销男声",
"BV402_streaming": "促销女声",
"BV006_streaming": "磁性男声"
2025-09-02 06:55:13 +08:00
},
"新闻播报": {
2025-09-02 07:36:42 +08:00
"BV011_streaming": "新闻女声",
"BV012_streaming": "新闻男声"
2025-09-02 06:55:13 +08:00
},
"教育场景": {
2025-09-02 07:36:42 +08:00
"BV034_streaming": "知性姐姐-双语",
"BV033_streaming": "温柔小哥"
2025-09-02 06:55:13 +08:00
}
}
def __init__(self,
app_id: Optional[str] = None,
access_token: Optional[str] = None,
cluster_id: Optional[str] = None,
voice_type: Optional[str] = None):
"""
初始化语音合成类
Args:
app_id: 应用ID默认为Config中的HS_APP_ID
access_token: 访问令牌默认为Config中的HS_ACCESS_TOKEN
cluster_id: 集群ID默认为Config中的HS_CLUSTER_ID
voice_type: 声音类型默认为Config中的HS_VOICE_TYPE_QINCANG
"""
self.app_id = app_id or HS_APP_ID
self.access_token = access_token or HS_ACCESS_TOKEN
self.cluster_id = cluster_id or HS_CLUSTER_ID
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
self.host = "openspeech.bytedance.com"
self.api_url = f"https://{self.host}/api/v1/tts"
self.header = {"Authorization": f"Bearer;{self.access_token}"}
def generate_audio(self,
text: str,
output_path: Optional[str] = None,
voice_type: Optional[str] = None,
encoding: str = "mp3",
speed_ratio: float = 1.0,
volume_ratio: float = 1.0,
pitch_ratio: float = 1.0,
text_type: str = "plain",
operation: str = "query") -> Optional[bytes]:
"""
生成语音音频
Args:
text: 要转换的文本内容
output_path: 输出文件路径如果提供则保存为文件
voice_type: 声音类型覆盖初始化设置
encoding: 音频编码格式默认mp3
speed_ratio: 语速比例默认1.0
volume_ratio: 音量比例默认1.0
pitch_ratio: 音调比例默认1.0
text_type: 文本类型默认plain
operation: 操作类型默认query
Returns:
bytes: 音频二进制数据失败返回None
"""
# 构建请求JSON
request_json = {
"app": {
"appid": self.app_id,
"token": "access_token",
"cluster": self.cluster_id
},
"user": {
"uid": str(uuid.uuid4()) # 使用随机用户ID
},
"audio": {
"voice_type": voice_type or self.voice_type,
"encoding": encoding,
"speed_ratio": speed_ratio,
"volume_ratio": volume_ratio,
"pitch_ratio": pitch_ratio,
},
"request": {
"reqid": str(uuid.uuid4()),
"text": text,
"text_type": text_type,
"operation": operation,
"with_frontend": 1,
"frontend_type": "unitTson"
}
}
try:
# 发送请求
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
resp.raise_for_status()
resp_data = resp.json()
if "data" in resp_data:
audio_data = base64.b64decode(resp_data["data"])
# 如果提供了输出路径,保存文件
if output_path:
self.save_audio(audio_data, output_path)
return audio_data
else:
print(f"API响应中未包含音频数据: {resp_data}")
return None
except requests.exceptions.RequestException as e:
print(f"请求失败: {e}")
return None
except Exception as e:
print(f"生成音频失败: {e}")
return None
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
"""
保存音频数据到文件
Args:
audio_data: 音频二进制数据
output_path: 输出文件路径
Returns:
bool: 保存是否成功
"""
try:
# 确保目录存在
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "wb") as f:
f.write(audio_data)
print(f"音频已保存到: {output_path}")
return True
except Exception as e:
print(f"保存音频失败: {e}")
return False
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
"""
获取音频信息
Args:
audio_data: 音频二进制数据
Returns:
Dict: 包含音频大小和格式的信息
"""
return {
"size_bytes": len(audio_data),
"size_kb": len(audio_data) / 1024,
"format": "mp3" # 目前固定为mp3格式
}
def get_voices_by_category(self, category: str) -> Dict[str, str]:
"""
根据分类获取音色列表
Args:
category: 分类名称
Returns:
Dict: 音色字典key为voice_typevalue为音色描述
"""
return self.TTS_VOICES.get(category, {})
def get_all_categories(self) -> list:
"""
获取所有音色分类
Returns:
list: 分类名称列表
"""
return list(self.TTS_VOICES.keys())
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
"""
获取所有音色分类和音色列表
Returns:
Dict: 所有音色分类和音色列表
"""
return self.TTS_VOICES
def main():
"""示例用法"""
# 创建语音合成实例
tts = ByteDanceTTS()
# 要转换的文本
text = """
君不见黄河之水天上来奔流到海不复回
君不见高堂明镜悲白发朝如青丝暮成雪
人生得意须尽欢莫使金樽空对月
天生我材必有用千金散尽还复来
烹羊宰牛且为乐会须一饮三百杯
岑夫子丹丘生将进酒杯莫停
与君歌一曲请君为我倾耳听
钟鼓馔玉不足贵但愿长醉不复醒
古来圣贤皆寂寞惟有饮者留其名
陈王昔时宴平乐斗酒十千恣欢谑
主人何为言少钱径须沽取对君酌
五花马千金裘呼儿将出换美酒与尔同销万古愁
"""
# 生成音频并保存
audio_data = tts.generate_audio(
text=text,
output_path="test_submit.mp3",
voice_type=HS_VOICE_TYPE_QINCANG,
speed_ratio=1.0,
volume_ratio=1.0
)
if audio_data:
# 获取音频信息
info = tts.get_audio_info(audio_data)
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
else:
print("音频生成失败")
if __name__ == '__main__':
main()