2025-09-02 06:55:13 +08:00
|
|
|
|
#coding=utf-8
|
|
|
|
|
|
|
|
|
|
'''
|
|
|
|
|
字节跳动语音合成API封装类
|
|
|
|
|
requires Python 3.6 or later
|
|
|
|
|
pip install requests
|
|
|
|
|
'''
|
|
|
|
|
import base64
|
|
|
|
|
import json
|
|
|
|
|
import uuid
|
|
|
|
|
import requests
|
|
|
|
|
from typing import Optional, Dict, Any
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 在ByteDanceTTS类中添加以下音色分类字典
|
|
|
|
|
|
|
|
|
|
class ByteDanceTTS:
|
|
|
|
|
"""
|
|
|
|
|
字节跳动语音合成API封装类
|
|
|
|
|
提供文本转语音功能
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# 音色分类字典
|
|
|
|
|
TTS_VOICES = {
|
|
|
|
|
"通用场景": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV700_V2_streaming": "灿灿 2.0",
|
|
|
|
|
"BV705_streaming": "炀炀",
|
|
|
|
|
"BV701_V2_streaming": "擎苍 2.0",
|
|
|
|
|
"BV001_V2_streaming": "通用女声 2.0",
|
|
|
|
|
"BV700_streaming": "灿灿",
|
|
|
|
|
"BV406_V2_streaming": "超自然音色-梓梓2.0",
|
|
|
|
|
"BV406_streaming": "超自然音色-梓梓",
|
|
|
|
|
"BV407_V2_streaming": "超自然音色-燃燃2.0",
|
|
|
|
|
"BV407_streaming": "超自然音色-燃燃",
|
|
|
|
|
"BV001_streaming": "通用女声(12种情感)",
|
|
|
|
|
"BV002_streaming": "通用男声"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"有声阅读": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV701_streaming": "擎苍",
|
|
|
|
|
"BV123_streaming": "阳光青年",
|
|
|
|
|
"BV120_streaming": "反卷青年",
|
|
|
|
|
"BV119_streaming": "通用赘婿",
|
|
|
|
|
"BV115_streaming": "古风少御",
|
|
|
|
|
"BV107_streaming": "霸气青叔",
|
|
|
|
|
"BV100_streaming": "质朴青年",
|
|
|
|
|
"BV104_streaming": "温柔淑女",
|
|
|
|
|
"BV004_streaming": "开朗青年",
|
|
|
|
|
"BV113_streaming": "甜宠少御",
|
|
|
|
|
"BV102_streaming": "儒雅青年"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"智能助手": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV405_streaming": "甜美小源",
|
|
|
|
|
"BV007_streaming": "亲切女声",
|
|
|
|
|
"BV009_streaming": "知性女声",
|
|
|
|
|
"BV419_streaming": "诚诚",
|
|
|
|
|
"BV415_streaming": "童童",
|
|
|
|
|
"BV008_streaming": "亲切男声"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"视频配音": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV408_streaming": "译制片男声",
|
|
|
|
|
"BV426_streaming": "懒小羊",
|
|
|
|
|
"BV428_streaming": "清新文艺女声",
|
|
|
|
|
"BV403_streaming": "鸡汤女声",
|
|
|
|
|
"BV158_streaming": "智慧老者",
|
|
|
|
|
"BV157_streaming": "慈爱姥姥",
|
|
|
|
|
"BR001_streaming": "说唱小哥",
|
|
|
|
|
"BV410_streaming": "活力解说男",
|
|
|
|
|
"BV411_streaming": "影视解说小帅",
|
|
|
|
|
"BV437_streaming": "解说小帅-多情感",
|
|
|
|
|
"BV412_streaming": "影视解说小美",
|
|
|
|
|
"BV159_streaming": "纨绔青年",
|
|
|
|
|
"BV418_streaming": "直播一姐",
|
|
|
|
|
"BV142_streaming": "沉稳解说男",
|
|
|
|
|
"BV143_streaming": "潇洒青年",
|
|
|
|
|
"BV056_streaming": "阳光男声",
|
|
|
|
|
"BV005_streaming": "活泼女声",
|
|
|
|
|
"BV064_streaming": "小萝莉"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"特色音色": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV051_streaming": "奶气萌娃",
|
|
|
|
|
"BV063_streaming": "动漫海绵",
|
|
|
|
|
"BV417_streaming": "动漫海星",
|
|
|
|
|
"BV050_streaming": "动漫小新",
|
|
|
|
|
"BV061_streaming": "天才童声"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"广告配音": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV401_streaming": "促销男声",
|
|
|
|
|
"BV402_streaming": "促销女声",
|
|
|
|
|
"BV006_streaming": "磁性男声"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"新闻播报": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV011_streaming": "新闻女声",
|
|
|
|
|
"BV012_streaming": "新闻男声"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
},
|
|
|
|
|
"教育场景": {
|
2025-09-02 07:36:42 +08:00
|
|
|
|
"BV034_streaming": "知性姐姐-双语",
|
|
|
|
|
"BV033_streaming": "温柔小哥"
|
2025-09-02 06:55:13 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def __init__(self,
|
|
|
|
|
app_id: Optional[str] = None,
|
|
|
|
|
access_token: Optional[str] = None,
|
|
|
|
|
cluster_id: Optional[str] = None,
|
|
|
|
|
voice_type: Optional[str] = None):
|
|
|
|
|
"""
|
|
|
|
|
初始化语音合成类
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
app_id: 应用ID,默认为Config中的HS_APP_ID
|
|
|
|
|
access_token: 访问令牌,默认为Config中的HS_ACCESS_TOKEN
|
|
|
|
|
cluster_id: 集群ID,默认为Config中的HS_CLUSTER_ID
|
|
|
|
|
voice_type: 声音类型,默认为Config中的HS_VOICE_TYPE_QINCANG
|
|
|
|
|
"""
|
|
|
|
|
self.app_id = app_id or HS_APP_ID
|
|
|
|
|
self.access_token = access_token or HS_ACCESS_TOKEN
|
|
|
|
|
self.cluster_id = cluster_id or HS_CLUSTER_ID
|
|
|
|
|
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
|
|
|
|
|
|
|
|
|
|
self.host = "openspeech.bytedance.com"
|
|
|
|
|
self.api_url = f"https://{self.host}/api/v1/tts"
|
|
|
|
|
self.header = {"Authorization": f"Bearer;{self.access_token}"}
|
|
|
|
|
|
|
|
|
|
def generate_audio(self,
|
|
|
|
|
text: str,
|
|
|
|
|
output_path: Optional[str] = None,
|
|
|
|
|
voice_type: Optional[str] = None,
|
|
|
|
|
encoding: str = "mp3",
|
|
|
|
|
speed_ratio: float = 1.0,
|
|
|
|
|
volume_ratio: float = 1.0,
|
|
|
|
|
pitch_ratio: float = 1.0,
|
|
|
|
|
text_type: str = "plain",
|
|
|
|
|
operation: str = "query") -> Optional[bytes]:
|
|
|
|
|
"""
|
|
|
|
|
生成语音音频
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
text: 要转换的文本内容
|
|
|
|
|
output_path: 输出文件路径,如果提供则保存为文件
|
|
|
|
|
voice_type: 声音类型,覆盖初始化设置
|
|
|
|
|
encoding: 音频编码格式,默认mp3
|
|
|
|
|
speed_ratio: 语速比例,默认1.0
|
|
|
|
|
volume_ratio: 音量比例,默认1.0
|
|
|
|
|
pitch_ratio: 音调比例,默认1.0
|
|
|
|
|
text_type: 文本类型,默认plain
|
|
|
|
|
operation: 操作类型,默认query
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bytes: 音频二进制数据,失败返回None
|
|
|
|
|
"""
|
|
|
|
|
# 构建请求JSON
|
|
|
|
|
request_json = {
|
|
|
|
|
"app": {
|
|
|
|
|
"appid": self.app_id,
|
|
|
|
|
"token": "access_token",
|
|
|
|
|
"cluster": self.cluster_id
|
|
|
|
|
},
|
|
|
|
|
"user": {
|
|
|
|
|
"uid": str(uuid.uuid4()) # 使用随机用户ID
|
|
|
|
|
},
|
|
|
|
|
"audio": {
|
|
|
|
|
"voice_type": voice_type or self.voice_type,
|
|
|
|
|
"encoding": encoding,
|
|
|
|
|
"speed_ratio": speed_ratio,
|
|
|
|
|
"volume_ratio": volume_ratio,
|
|
|
|
|
"pitch_ratio": pitch_ratio,
|
|
|
|
|
},
|
|
|
|
|
"request": {
|
|
|
|
|
"reqid": str(uuid.uuid4()),
|
|
|
|
|
"text": text,
|
|
|
|
|
"text_type": text_type,
|
|
|
|
|
"operation": operation,
|
|
|
|
|
"with_frontend": 1,
|
|
|
|
|
"frontend_type": "unitTson"
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 发送请求
|
|
|
|
|
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
|
|
|
|
|
resp_data = resp.json()
|
|
|
|
|
|
|
|
|
|
if "data" in resp_data:
|
|
|
|
|
audio_data = base64.b64decode(resp_data["data"])
|
|
|
|
|
|
|
|
|
|
# 如果提供了输出路径,保存文件
|
|
|
|
|
if output_path:
|
|
|
|
|
self.save_audio(audio_data, output_path)
|
|
|
|
|
|
|
|
|
|
return audio_data
|
|
|
|
|
else:
|
|
|
|
|
print(f"API响应中未包含音频数据: {resp_data}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
print(f"请求失败: {e}")
|
|
|
|
|
return None
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"生成音频失败: {e}")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
保存音频数据到文件
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
audio_data: 音频二进制数据
|
|
|
|
|
output_path: 输出文件路径
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
bool: 保存是否成功
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 确保目录存在
|
|
|
|
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
with open(output_path, "wb") as f:
|
|
|
|
|
f.write(audio_data)
|
|
|
|
|
|
|
|
|
|
print(f"音频已保存到: {output_path}")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"保存音频失败: {e}")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
获取音频信息
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
audio_data: 音频二进制数据
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict: 包含音频大小和格式的信息
|
|
|
|
|
"""
|
|
|
|
|
return {
|
|
|
|
|
"size_bytes": len(audio_data),
|
|
|
|
|
"size_kb": len(audio_data) / 1024,
|
|
|
|
|
"format": "mp3" # 目前固定为mp3格式
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def get_voices_by_category(self, category: str) -> Dict[str, str]:
|
|
|
|
|
"""
|
|
|
|
|
根据分类获取音色列表
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
category: 分类名称
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict: 音色字典,key为voice_type,value为音色描述
|
|
|
|
|
"""
|
|
|
|
|
return self.TTS_VOICES.get(category, {})
|
|
|
|
|
|
|
|
|
|
def get_all_categories(self) -> list:
|
|
|
|
|
"""
|
|
|
|
|
获取所有音色分类
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
list: 分类名称列表
|
|
|
|
|
"""
|
|
|
|
|
return list(self.TTS_VOICES.keys())
|
|
|
|
|
|
|
|
|
|
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
|
|
|
|
|
"""
|
|
|
|
|
获取所有音色分类和音色列表
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict: 所有音色分类和音色列表
|
|
|
|
|
"""
|
|
|
|
|
return self.TTS_VOICES
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
"""示例用法"""
|
|
|
|
|
# 创建语音合成实例
|
|
|
|
|
tts = ByteDanceTTS()
|
|
|
|
|
|
|
|
|
|
# 要转换的文本
|
|
|
|
|
text = """
|
|
|
|
|
君不见,黄河之水天上来,奔流到海不复回。
|
|
|
|
|
君不见,高堂明镜悲白发,朝如青丝暮成雪。
|
|
|
|
|
人生得意须尽欢,莫使金樽空对月。
|
|
|
|
|
天生我材必有用,千金散尽还复来。
|
|
|
|
|
烹羊宰牛且为乐,会须一饮三百杯。
|
|
|
|
|
岑夫子,丹丘生,将进酒,杯莫停。
|
|
|
|
|
与君歌一曲,请君为我倾耳听。
|
|
|
|
|
钟鼓馔玉不足贵,但愿长醉不复醒。
|
|
|
|
|
古来圣贤皆寂寞,惟有饮者留其名。
|
|
|
|
|
陈王昔时宴平乐,斗酒十千恣欢谑。
|
|
|
|
|
主人何为言少钱,径须沽取对君酌。
|
|
|
|
|
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
# 生成音频并保存
|
|
|
|
|
audio_data = tts.generate_audio(
|
|
|
|
|
text=text,
|
|
|
|
|
output_path="test_submit.mp3",
|
|
|
|
|
voice_type=HS_VOICE_TYPE_QINCANG,
|
|
|
|
|
speed_ratio=1.0,
|
|
|
|
|
volume_ratio=1.0
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if audio_data:
|
|
|
|
|
# 获取音频信息
|
|
|
|
|
info = tts.get_audio_info(audio_data)
|
|
|
|
|
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
|
|
|
|
|
else:
|
|
|
|
|
print("音频生成失败")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|