285 lines
10 KiB
Python
285 lines
10 KiB
Python
|
#coding=utf-8
|
|||
|
|
|||
|
'''
|
|||
|
字节跳动语音合成API封装类
|
|||
|
requires Python 3.6 or later
|
|||
|
pip install requests
|
|||
|
'''
|
|||
|
import base64
|
|||
|
import json
|
|||
|
import uuid
|
|||
|
import requests
|
|||
|
from typing import Optional, Dict, Any
|
|||
|
from pathlib import Path
|
|||
|
|
|||
|
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
|
|||
|
|
|||
|
|
|||
|
# 在ByteDanceTTS类中添加以下音色分类字典
|
|||
|
|
|||
|
class ByteDanceTTS:
|
|||
|
"""
|
|||
|
字节跳动语音合成API封装类
|
|||
|
提供文本转语音功能
|
|||
|
"""
|
|||
|
|
|||
|
# 音色分类字典
|
|||
|
TTS_VOICES = {
|
|||
|
"通用场景": {
|
|||
|
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
|||
|
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
|||
|
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
|||
|
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
|||
|
},
|
|||
|
"有声阅读": {
|
|||
|
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
|||
|
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
|||
|
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
|
|||
|
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
|
|||
|
},
|
|||
|
"智能助手": {
|
|||
|
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
|||
|
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
|||
|
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
|||
|
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
|||
|
},
|
|||
|
"视频配音": {
|
|||
|
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
|||
|
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
|
|||
|
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
|
|||
|
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
|||
|
},
|
|||
|
"特色音色": {
|
|||
|
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
|||
|
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)"
|
|||
|
},
|
|||
|
"广告配音": {
|
|||
|
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
|||
|
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
|
|||
|
},
|
|||
|
"新闻播报": {
|
|||
|
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
|
|||
|
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)"
|
|||
|
},
|
|||
|
"教育场景": {
|
|||
|
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
|||
|
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
def __init__(self,
|
|||
|
app_id: Optional[str] = None,
|
|||
|
access_token: Optional[str] = None,
|
|||
|
cluster_id: Optional[str] = None,
|
|||
|
voice_type: Optional[str] = None):
|
|||
|
"""
|
|||
|
初始化语音合成类
|
|||
|
|
|||
|
Args:
|
|||
|
app_id: 应用ID,默认为Config中的HS_APP_ID
|
|||
|
access_token: 访问令牌,默认为Config中的HS_ACCESS_TOKEN
|
|||
|
cluster_id: 集群ID,默认为Config中的HS_CLUSTER_ID
|
|||
|
voice_type: 声音类型,默认为Config中的HS_VOICE_TYPE_QINCANG
|
|||
|
"""
|
|||
|
self.app_id = app_id or HS_APP_ID
|
|||
|
self.access_token = access_token or HS_ACCESS_TOKEN
|
|||
|
self.cluster_id = cluster_id or HS_CLUSTER_ID
|
|||
|
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
|
|||
|
|
|||
|
self.host = "openspeech.bytedance.com"
|
|||
|
self.api_url = f"https://{self.host}/api/v1/tts"
|
|||
|
self.header = {"Authorization": f"Bearer;{self.access_token}"}
|
|||
|
|
|||
|
def generate_audio(self,
|
|||
|
text: str,
|
|||
|
output_path: Optional[str] = None,
|
|||
|
voice_type: Optional[str] = None,
|
|||
|
encoding: str = "mp3",
|
|||
|
speed_ratio: float = 1.0,
|
|||
|
volume_ratio: float = 1.0,
|
|||
|
pitch_ratio: float = 1.0,
|
|||
|
text_type: str = "plain",
|
|||
|
operation: str = "query") -> Optional[bytes]:
|
|||
|
"""
|
|||
|
生成语音音频
|
|||
|
|
|||
|
Args:
|
|||
|
text: 要转换的文本内容
|
|||
|
output_path: 输出文件路径,如果提供则保存为文件
|
|||
|
voice_type: 声音类型,覆盖初始化设置
|
|||
|
encoding: 音频编码格式,默认mp3
|
|||
|
speed_ratio: 语速比例,默认1.0
|
|||
|
volume_ratio: 音量比例,默认1.0
|
|||
|
pitch_ratio: 音调比例,默认1.0
|
|||
|
text_type: 文本类型,默认plain
|
|||
|
operation: 操作类型,默认query
|
|||
|
|
|||
|
Returns:
|
|||
|
bytes: 音频二进制数据,失败返回None
|
|||
|
"""
|
|||
|
# 构建请求JSON
|
|||
|
request_json = {
|
|||
|
"app": {
|
|||
|
"appid": self.app_id,
|
|||
|
"token": "access_token",
|
|||
|
"cluster": self.cluster_id
|
|||
|
},
|
|||
|
"user": {
|
|||
|
"uid": str(uuid.uuid4()) # 使用随机用户ID
|
|||
|
},
|
|||
|
"audio": {
|
|||
|
"voice_type": voice_type or self.voice_type,
|
|||
|
"encoding": encoding,
|
|||
|
"speed_ratio": speed_ratio,
|
|||
|
"volume_ratio": volume_ratio,
|
|||
|
"pitch_ratio": pitch_ratio,
|
|||
|
},
|
|||
|
"request": {
|
|||
|
"reqid": str(uuid.uuid4()),
|
|||
|
"text": text,
|
|||
|
"text_type": text_type,
|
|||
|
"operation": operation,
|
|||
|
"with_frontend": 1,
|
|||
|
"frontend_type": "unitTson"
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
try:
|
|||
|
# 发送请求
|
|||
|
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
|
|||
|
resp.raise_for_status()
|
|||
|
|
|||
|
resp_data = resp.json()
|
|||
|
|
|||
|
if "data" in resp_data:
|
|||
|
audio_data = base64.b64decode(resp_data["data"])
|
|||
|
|
|||
|
# 如果提供了输出路径,保存文件
|
|||
|
if output_path:
|
|||
|
self.save_audio(audio_data, output_path)
|
|||
|
|
|||
|
return audio_data
|
|||
|
else:
|
|||
|
print(f"API响应中未包含音频数据: {resp_data}")
|
|||
|
return None
|
|||
|
|
|||
|
except requests.exceptions.RequestException as e:
|
|||
|
print(f"请求失败: {e}")
|
|||
|
return None
|
|||
|
except Exception as e:
|
|||
|
print(f"生成音频失败: {e}")
|
|||
|
return None
|
|||
|
|
|||
|
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
|
|||
|
"""
|
|||
|
保存音频数据到文件
|
|||
|
|
|||
|
Args:
|
|||
|
audio_data: 音频二进制数据
|
|||
|
output_path: 输出文件路径
|
|||
|
|
|||
|
Returns:
|
|||
|
bool: 保存是否成功
|
|||
|
"""
|
|||
|
try:
|
|||
|
# 确保目录存在
|
|||
|
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|||
|
|
|||
|
with open(output_path, "wb") as f:
|
|||
|
f.write(audio_data)
|
|||
|
|
|||
|
print(f"音频已保存到: {output_path}")
|
|||
|
return True
|
|||
|
|
|||
|
except Exception as e:
|
|||
|
print(f"保存音频失败: {e}")
|
|||
|
return False
|
|||
|
|
|||
|
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
|
|||
|
"""
|
|||
|
获取音频信息
|
|||
|
|
|||
|
Args:
|
|||
|
audio_data: 音频二进制数据
|
|||
|
|
|||
|
Returns:
|
|||
|
Dict: 包含音频大小和格式的信息
|
|||
|
"""
|
|||
|
return {
|
|||
|
"size_bytes": len(audio_data),
|
|||
|
"size_kb": len(audio_data) / 1024,
|
|||
|
"format": "mp3" # 目前固定为mp3格式
|
|||
|
}
|
|||
|
|
|||
|
def get_voices_by_category(self, category: str) -> Dict[str, str]:
|
|||
|
"""
|
|||
|
根据分类获取音色列表
|
|||
|
|
|||
|
Args:
|
|||
|
category: 分类名称
|
|||
|
|
|||
|
Returns:
|
|||
|
Dict: 音色字典,key为voice_type,value为音色描述
|
|||
|
"""
|
|||
|
return self.TTS_VOICES.get(category, {})
|
|||
|
|
|||
|
def get_all_categories(self) -> list:
|
|||
|
"""
|
|||
|
获取所有音色分类
|
|||
|
|
|||
|
Returns:
|
|||
|
list: 分类名称列表
|
|||
|
"""
|
|||
|
return list(self.TTS_VOICES.keys())
|
|||
|
|
|||
|
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
|
|||
|
"""
|
|||
|
获取所有音色分类和音色列表
|
|||
|
|
|||
|
Returns:
|
|||
|
Dict: 所有音色分类和音色列表
|
|||
|
"""
|
|||
|
return self.TTS_VOICES
|
|||
|
|
|||
|
|
|||
|
def main():
|
|||
|
"""示例用法"""
|
|||
|
# 创建语音合成实例
|
|||
|
tts = ByteDanceTTS()
|
|||
|
|
|||
|
# 要转换的文本
|
|||
|
text = """
|
|||
|
君不见,黄河之水天上来,奔流到海不复回。
|
|||
|
君不见,高堂明镜悲白发,朝如青丝暮成雪。
|
|||
|
人生得意须尽欢,莫使金樽空对月。
|
|||
|
天生我材必有用,千金散尽还复来。
|
|||
|
烹羊宰牛且为乐,会须一饮三百杯。
|
|||
|
岑夫子,丹丘生,将进酒,杯莫停。
|
|||
|
与君歌一曲,请君为我倾耳听。
|
|||
|
钟鼓馔玉不足贵,但愿长醉不复醒。
|
|||
|
古来圣贤皆寂寞,惟有饮者留其名。
|
|||
|
陈王昔时宴平乐,斗酒十千恣欢谑。
|
|||
|
主人何为言少钱,径须沽取对君酌。
|
|||
|
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
|
|||
|
"""
|
|||
|
|
|||
|
# 生成音频并保存
|
|||
|
audio_data = tts.generate_audio(
|
|||
|
text=text,
|
|||
|
output_path="test_submit.mp3",
|
|||
|
voice_type=HS_VOICE_TYPE_QINCANG,
|
|||
|
speed_ratio=1.0,
|
|||
|
volume_ratio=1.0
|
|||
|
)
|
|||
|
|
|||
|
if audio_data:
|
|||
|
# 获取音频信息
|
|||
|
info = tts.get_audio_info(audio_data)
|
|||
|
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
|
|||
|
else:
|
|||
|
print("音频生成失败")
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
main()
|