This commit is contained in:
2025-09-02 06:55:13 +08:00
parent 4078acb909
commit 1b959b3ba9
11 changed files with 1106 additions and 74 deletions

View File

@@ -0,0 +1,284 @@
#coding=utf-8
'''
字节跳动语音合成API封装类
requires Python 3.6 or later
pip install requests
'''
import base64
import json
import uuid
import requests
from typing import Optional, Dict, Any
from pathlib import Path
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
# 在ByteDanceTTS类中添加以下音色分类字典
class ByteDanceTTS:
"""
字节跳动语音合成API封装类
提供文本转语音功能
"""
# 音色分类字典
TTS_VOICES = {
"通用场景": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
},
"有声阅读": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
},
"智能助手": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
},
"视频配音": {
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
},
"特色音色": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)"
},
"广告配音": {
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
},
"新闻播报": {
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)"
},
"教育场景": {
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
}
}
def __init__(self,
app_id: Optional[str] = None,
access_token: Optional[str] = None,
cluster_id: Optional[str] = None,
voice_type: Optional[str] = None):
"""
初始化语音合成类
Args:
app_id: 应用ID默认为Config中的HS_APP_ID
access_token: 访问令牌默认为Config中的HS_ACCESS_TOKEN
cluster_id: 集群ID默认为Config中的HS_CLUSTER_ID
voice_type: 声音类型默认为Config中的HS_VOICE_TYPE_QINCANG
"""
self.app_id = app_id or HS_APP_ID
self.access_token = access_token or HS_ACCESS_TOKEN
self.cluster_id = cluster_id or HS_CLUSTER_ID
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
self.host = "openspeech.bytedance.com"
self.api_url = f"https://{self.host}/api/v1/tts"
self.header = {"Authorization": f"Bearer;{self.access_token}"}
def generate_audio(self,
text: str,
output_path: Optional[str] = None,
voice_type: Optional[str] = None,
encoding: str = "mp3",
speed_ratio: float = 1.0,
volume_ratio: float = 1.0,
pitch_ratio: float = 1.0,
text_type: str = "plain",
operation: str = "query") -> Optional[bytes]:
"""
生成语音音频
Args:
text: 要转换的文本内容
output_path: 输出文件路径,如果提供则保存为文件
voice_type: 声音类型,覆盖初始化设置
encoding: 音频编码格式默认mp3
speed_ratio: 语速比例默认1.0
volume_ratio: 音量比例默认1.0
pitch_ratio: 音调比例默认1.0
text_type: 文本类型默认plain
operation: 操作类型默认query
Returns:
bytes: 音频二进制数据失败返回None
"""
# 构建请求JSON
request_json = {
"app": {
"appid": self.app_id,
"token": "access_token",
"cluster": self.cluster_id
},
"user": {
"uid": str(uuid.uuid4()) # 使用随机用户ID
},
"audio": {
"voice_type": voice_type or self.voice_type,
"encoding": encoding,
"speed_ratio": speed_ratio,
"volume_ratio": volume_ratio,
"pitch_ratio": pitch_ratio,
},
"request": {
"reqid": str(uuid.uuid4()),
"text": text,
"text_type": text_type,
"operation": operation,
"with_frontend": 1,
"frontend_type": "unitTson"
}
}
try:
# 发送请求
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
resp.raise_for_status()
resp_data = resp.json()
if "data" in resp_data:
audio_data = base64.b64decode(resp_data["data"])
# 如果提供了输出路径,保存文件
if output_path:
self.save_audio(audio_data, output_path)
return audio_data
else:
print(f"API响应中未包含音频数据: {resp_data}")
return None
except requests.exceptions.RequestException as e:
print(f"请求失败: {e}")
return None
except Exception as e:
print(f"生成音频失败: {e}")
return None
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
"""
保存音频数据到文件
Args:
audio_data: 音频二进制数据
output_path: 输出文件路径
Returns:
bool: 保存是否成功
"""
try:
# 确保目录存在
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "wb") as f:
f.write(audio_data)
print(f"音频已保存到: {output_path}")
return True
except Exception as e:
print(f"保存音频失败: {e}")
return False
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
"""
获取音频信息
Args:
audio_data: 音频二进制数据
Returns:
Dict: 包含音频大小和格式的信息
"""
return {
"size_bytes": len(audio_data),
"size_kb": len(audio_data) / 1024,
"format": "mp3" # 目前固定为mp3格式
}
def get_voices_by_category(self, category: str) -> Dict[str, str]:
"""
根据分类获取音色列表
Args:
category: 分类名称
Returns:
Dict: 音色字典key为voice_typevalue为音色描述
"""
return self.TTS_VOICES.get(category, {})
def get_all_categories(self) -> list:
"""
获取所有音色分类
Returns:
list: 分类名称列表
"""
return list(self.TTS_VOICES.keys())
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
"""
获取所有音色分类和音色列表
Returns:
Dict: 所有音色分类和音色列表
"""
return self.TTS_VOICES
def main():
"""示例用法"""
# 创建语音合成实例
tts = ByteDanceTTS()
# 要转换的文本
text = """
君不见,黄河之水天上来,奔流到海不复回。
君不见,高堂明镜悲白发,朝如青丝暮成雪。
人生得意须尽欢,莫使金樽空对月。
天生我材必有用,千金散尽还复来。
烹羊宰牛且为乐,会须一饮三百杯。
岑夫子,丹丘生,将进酒,杯莫停。
与君歌一曲,请君为我倾耳听。
钟鼓馔玉不足贵,但愿长醉不复醒。
古来圣贤皆寂寞,惟有饮者留其名。
陈王昔时宴平乐,斗酒十千恣欢谑。
主人何为言少钱,径须沽取对君酌。
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
"""
# 生成音频并保存
audio_data = tts.generate_audio(
text=text,
output_path="test_submit.mp3",
voice_type=HS_VOICE_TYPE_QINCANG,
speed_ratio=1.0,
volume_ratio=1.0
)
if audio_data:
# 获取音频信息
info = tts.get_audio_info(audio_data)
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
else:
print("音频生成失败")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,211 @@
import requests
import time
from typing import Dict, Optional
from Config import Config
class VideoRetalk:
"""
阿里云DashScope LivePortrait视频生成类
videoretalk是一个人物视频生成模型可基于人物视频和人声音频生成人物讲话口型与输入音频相匹配的新视频。
视频口型替换-声动人像VideoRetalk
实现图像和音频合成视频功能
"""
def __init__(self, api_key: str):
"""
初始化视频生成类
Args:
api_key: 阿里云DashScope API密钥
"""
self.api_key = api_key
self.base_url = "https://dashscope.aliyuncs.com/api/v1"
self.video_synthesis_url = f"{self.base_url}/services/aigc/image2video/video-synthesis"
def submit_video_task(self, image_url: str, audio_url: str,
template_id: str = "normal",
eye_move_freq: float = 0.5,
video_fps: int = 30,
mouth_move_strength: float = 1.0,
paste_back: bool = True,
head_move_strength: float = 0.7) -> Dict:
"""
提交视频生成任务
Args:
image_url: 输入图片URL
audio_url: 输入音频URL
template_id: 模板ID默认为"normal"
eye_move_freq: 眼睛移动频率默认0.5
video_fps: 视频帧率默认30
mouth_move_strength: 嘴巴移动强度默认1.0
paste_back: 是否粘贴背景默认True
head_move_strength: 头部移动强度默认0.7
Returns:
Dict: 包含task_id和task_status的响应数据
"""
headers = {
'X-DashScope-Async': 'enable',
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json'
}
payload = {
"model": "liveportrait",
"input": {
"image_url": image_url,
"audio_url": audio_url
},
"parameters": {
"template_id": template_id,
"eye_move_freq": eye_move_freq,
"video_fps": video_fps,
"mouth_move_strength": mouth_move_strength,
"paste_back": paste_back,
"head_move_strength": head_move_strength
}
}
try:
response = requests.post(self.video_synthesis_url,
headers=headers,
json=payload)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"提交视频任务失败: {e}")
def get_task_status(self, task_id: str) -> Dict:
"""
查询任务状态
Args:
task_id: 任务ID
Returns:
Dict: 任务状态信息
"""
headers = {
'Authorization': f'Bearer {self.api_key}'
}
task_url = f"{self.base_url}/tasks/{task_id}"
try:
response = requests.get(task_url, headers=headers)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"查询任务状态失败: {e}")
def wait_for_task_completion(self, task_id: str,
poll_interval: int = 5,
timeout: int = 300) -> Dict:
"""
等待任务完成
Args:
task_id: 任务ID
poll_interval: 轮询间隔(秒)默认5秒
timeout: 超时时间(秒)默认300秒
Returns:
Dict: 任务完成后的结果
"""
start_time = time.time()
while time.time() - start_time < timeout:
task_status = self.get_task_status(task_id)
status = task_status.get('output', {}).get('task_status')
if status == 'SUCCEEDED':
return task_status
elif status == 'FAILED':
error_code = task_status.get('output', {}).get('code', '未知错误')
error_message = task_status.get('output', {}).get('message', '无错误信息')
raise Exception(f"任务执行失败: {error_code} - {error_message}")
elif status in ['PENDING', 'RUNNING']:
print(f"任务状态: {status}, 等待中...")
time.sleep(poll_interval)
else:
raise Exception(f"未知的任务状态: {status}")
raise Exception(f"任务超时,未在{timeout}秒内完成")
def generate_video(self, image_url: str, audio_url: str,
**kwargs) -> Optional[str]:
"""
生成视频的完整流程
Args:
image_url: 输入图片URL
audio_url: 输入音频URL
**kwargs: 其他参数同submit_video_task
Returns:
str: 生成的视频URL失败返回None
"""
try:
# 提交任务
submit_response = self.submit_video_task(image_url, audio_url, **kwargs)
task_id = submit_response.get('output', {}).get('task_id')
if not task_id:
print("提交任务失败未获取到task_id")
return None
print(f"任务已提交task_id: {task_id}")
# 等待任务完成
result = self.wait_for_task_completion(task_id)
# 获取视频URL
video_url = result.get('output', {}).get('results', {}).get('video_url')
if video_url:
print(f"视频生成成功: {video_url}")
# 获取使用情况信息
usage = result.get('usage', {})
if usage:
print(f"视频时长: {usage.get('video_duration')}")
print(f"视频比例: {usage.get('video_ratio')}")
return video_url
else:
print("未找到生成的视频URL")
return None
except Exception as e:
print(f"视频生成失败: {e}")
return None
# 使用示例
if __name__ == "__main__":
# 替换为您的实际API密钥
API_KEY = Config.ALY_LLM_API_KEY
# 创建视频生成实例
video_retalk = VideoRetalk(API_KEY)
# 示例:生成视频
try:
video_url = video_retalk.generate_video(
image_url="https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Backup/LiBai.jpg",
audio_url="https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Backup/JiangJinJiu.mp3",
template_id="normal",
eye_move_freq=0.5,
video_fps=30,
mouth_move_strength=1.0,
paste_back=True,
head_move_strength=0.7
)
if video_url:
print(f"最终视频URL: {video_url}")
except Exception as e:
print(f"执行过程中发生错误: {e}")