'commit'
This commit is contained in:
284
dsLightRag/Util/GengerateAudio.py
Normal file
284
dsLightRag/Util/GengerateAudio.py
Normal file
@@ -0,0 +1,284 @@
|
||||
#coding=utf-8
|
||||
|
||||
'''
|
||||
字节跳动语音合成API封装类
|
||||
requires Python 3.6 or later
|
||||
pip install requests
|
||||
'''
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
import requests
|
||||
from typing import Optional, Dict, Any
|
||||
from pathlib import Path
|
||||
|
||||
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
|
||||
|
||||
|
||||
# 在ByteDanceTTS类中添加以下音色分类字典
|
||||
|
||||
class ByteDanceTTS:
|
||||
"""
|
||||
字节跳动语音合成API封装类
|
||||
提供文本转语音功能
|
||||
"""
|
||||
|
||||
# 音色分类字典
|
||||
TTS_VOICES = {
|
||||
"通用场景": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
},
|
||||
"有声阅读": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
|
||||
},
|
||||
"智能助手": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
},
|
||||
"视频配音": {
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
},
|
||||
"特色音色": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)"
|
||||
},
|
||||
"广告配音": {
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
|
||||
},
|
||||
"新闻播报": {
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)"
|
||||
},
|
||||
"教育场景": {
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
app_id: Optional[str] = None,
|
||||
access_token: Optional[str] = None,
|
||||
cluster_id: Optional[str] = None,
|
||||
voice_type: Optional[str] = None):
|
||||
"""
|
||||
初始化语音合成类
|
||||
|
||||
Args:
|
||||
app_id: 应用ID,默认为Config中的HS_APP_ID
|
||||
access_token: 访问令牌,默认为Config中的HS_ACCESS_TOKEN
|
||||
cluster_id: 集群ID,默认为Config中的HS_CLUSTER_ID
|
||||
voice_type: 声音类型,默认为Config中的HS_VOICE_TYPE_QINCANG
|
||||
"""
|
||||
self.app_id = app_id or HS_APP_ID
|
||||
self.access_token = access_token or HS_ACCESS_TOKEN
|
||||
self.cluster_id = cluster_id or HS_CLUSTER_ID
|
||||
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
|
||||
|
||||
self.host = "openspeech.bytedance.com"
|
||||
self.api_url = f"https://{self.host}/api/v1/tts"
|
||||
self.header = {"Authorization": f"Bearer;{self.access_token}"}
|
||||
|
||||
def generate_audio(self,
|
||||
text: str,
|
||||
output_path: Optional[str] = None,
|
||||
voice_type: Optional[str] = None,
|
||||
encoding: str = "mp3",
|
||||
speed_ratio: float = 1.0,
|
||||
volume_ratio: float = 1.0,
|
||||
pitch_ratio: float = 1.0,
|
||||
text_type: str = "plain",
|
||||
operation: str = "query") -> Optional[bytes]:
|
||||
"""
|
||||
生成语音音频
|
||||
|
||||
Args:
|
||||
text: 要转换的文本内容
|
||||
output_path: 输出文件路径,如果提供则保存为文件
|
||||
voice_type: 声音类型,覆盖初始化设置
|
||||
encoding: 音频编码格式,默认mp3
|
||||
speed_ratio: 语速比例,默认1.0
|
||||
volume_ratio: 音量比例,默认1.0
|
||||
pitch_ratio: 音调比例,默认1.0
|
||||
text_type: 文本类型,默认plain
|
||||
operation: 操作类型,默认query
|
||||
|
||||
Returns:
|
||||
bytes: 音频二进制数据,失败返回None
|
||||
"""
|
||||
# 构建请求JSON
|
||||
request_json = {
|
||||
"app": {
|
||||
"appid": self.app_id,
|
||||
"token": "access_token",
|
||||
"cluster": self.cluster_id
|
||||
},
|
||||
"user": {
|
||||
"uid": str(uuid.uuid4()) # 使用随机用户ID
|
||||
},
|
||||
"audio": {
|
||||
"voice_type": voice_type or self.voice_type,
|
||||
"encoding": encoding,
|
||||
"speed_ratio": speed_ratio,
|
||||
"volume_ratio": volume_ratio,
|
||||
"pitch_ratio": pitch_ratio,
|
||||
},
|
||||
"request": {
|
||||
"reqid": str(uuid.uuid4()),
|
||||
"text": text,
|
||||
"text_type": text_type,
|
||||
"operation": operation,
|
||||
"with_frontend": 1,
|
||||
"frontend_type": "unitTson"
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
# 发送请求
|
||||
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
|
||||
resp.raise_for_status()
|
||||
|
||||
resp_data = resp.json()
|
||||
|
||||
if "data" in resp_data:
|
||||
audio_data = base64.b64decode(resp_data["data"])
|
||||
|
||||
# 如果提供了输出路径,保存文件
|
||||
if output_path:
|
||||
self.save_audio(audio_data, output_path)
|
||||
|
||||
return audio_data
|
||||
else:
|
||||
print(f"API响应中未包含音频数据: {resp_data}")
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"请求失败: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"生成音频失败: {e}")
|
||||
return None
|
||||
|
||||
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
|
||||
"""
|
||||
保存音频数据到文件
|
||||
|
||||
Args:
|
||||
audio_data: 音频二进制数据
|
||||
output_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
bool: 保存是否成功
|
||||
"""
|
||||
try:
|
||||
# 确保目录存在
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_data)
|
||||
|
||||
print(f"音频已保存到: {output_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"保存音频失败: {e}")
|
||||
return False
|
||||
|
||||
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
|
||||
"""
|
||||
获取音频信息
|
||||
|
||||
Args:
|
||||
audio_data: 音频二进制数据
|
||||
|
||||
Returns:
|
||||
Dict: 包含音频大小和格式的信息
|
||||
"""
|
||||
return {
|
||||
"size_bytes": len(audio_data),
|
||||
"size_kb": len(audio_data) / 1024,
|
||||
"format": "mp3" # 目前固定为mp3格式
|
||||
}
|
||||
|
||||
def get_voices_by_category(self, category: str) -> Dict[str, str]:
|
||||
"""
|
||||
根据分类获取音色列表
|
||||
|
||||
Args:
|
||||
category: 分类名称
|
||||
|
||||
Returns:
|
||||
Dict: 音色字典,key为voice_type,value为音色描述
|
||||
"""
|
||||
return self.TTS_VOICES.get(category, {})
|
||||
|
||||
def get_all_categories(self) -> list:
|
||||
"""
|
||||
获取所有音色分类
|
||||
|
||||
Returns:
|
||||
list: 分类名称列表
|
||||
"""
|
||||
return list(self.TTS_VOICES.keys())
|
||||
|
||||
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
|
||||
"""
|
||||
获取所有音色分类和音色列表
|
||||
|
||||
Returns:
|
||||
Dict: 所有音色分类和音色列表
|
||||
"""
|
||||
return self.TTS_VOICES
|
||||
|
||||
|
||||
def main():
|
||||
"""示例用法"""
|
||||
# 创建语音合成实例
|
||||
tts = ByteDanceTTS()
|
||||
|
||||
# 要转换的文本
|
||||
text = """
|
||||
君不见,黄河之水天上来,奔流到海不复回。
|
||||
君不见,高堂明镜悲白发,朝如青丝暮成雪。
|
||||
人生得意须尽欢,莫使金樽空对月。
|
||||
天生我材必有用,千金散尽还复来。
|
||||
烹羊宰牛且为乐,会须一饮三百杯。
|
||||
岑夫子,丹丘生,将进酒,杯莫停。
|
||||
与君歌一曲,请君为我倾耳听。
|
||||
钟鼓馔玉不足贵,但愿长醉不复醒。
|
||||
古来圣贤皆寂寞,惟有饮者留其名。
|
||||
陈王昔时宴平乐,斗酒十千恣欢谑。
|
||||
主人何为言少钱,径须沽取对君酌。
|
||||
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
|
||||
"""
|
||||
|
||||
# 生成音频并保存
|
||||
audio_data = tts.generate_audio(
|
||||
text=text,
|
||||
output_path="test_submit.mp3",
|
||||
voice_type=HS_VOICE_TYPE_QINCANG,
|
||||
speed_ratio=1.0,
|
||||
volume_ratio=1.0
|
||||
)
|
||||
|
||||
if audio_data:
|
||||
# 获取音频信息
|
||||
info = tts.get_audio_info(audio_data)
|
||||
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
|
||||
else:
|
||||
print("音频生成失败")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
211
dsLightRag/Util/VideoRetalk.py
Normal file
211
dsLightRag/Util/VideoRetalk.py
Normal file
@@ -0,0 +1,211 @@
|
||||
import requests
|
||||
import time
|
||||
from typing import Dict, Optional
|
||||
|
||||
from Config import Config
|
||||
|
||||
|
||||
class VideoRetalk:
|
||||
"""
|
||||
阿里云DashScope LivePortrait视频生成类
|
||||
videoretalk是一个人物视频生成模型,可基于人物视频和人声音频,生成人物讲话口型与输入音频相匹配的新视频。
|
||||
视频口型替换-声动人像VideoRetalk
|
||||
实现图像和音频合成视频功能
|
||||
"""
|
||||
|
||||
def __init__(self, api_key: str):
|
||||
"""
|
||||
初始化视频生成类
|
||||
|
||||
Args:
|
||||
api_key: 阿里云DashScope API密钥
|
||||
"""
|
||||
self.api_key = api_key
|
||||
self.base_url = "https://dashscope.aliyuncs.com/api/v1"
|
||||
self.video_synthesis_url = f"{self.base_url}/services/aigc/image2video/video-synthesis"
|
||||
|
||||
def submit_video_task(self, image_url: str, audio_url: str,
|
||||
template_id: str = "normal",
|
||||
eye_move_freq: float = 0.5,
|
||||
video_fps: int = 30,
|
||||
mouth_move_strength: float = 1.0,
|
||||
paste_back: bool = True,
|
||||
head_move_strength: float = 0.7) -> Dict:
|
||||
"""
|
||||
提交视频生成任务
|
||||
|
||||
Args:
|
||||
image_url: 输入图片URL
|
||||
audio_url: 输入音频URL
|
||||
template_id: 模板ID,默认为"normal"
|
||||
eye_move_freq: 眼睛移动频率,默认0.5
|
||||
video_fps: 视频帧率,默认30
|
||||
mouth_move_strength: 嘴巴移动强度,默认1.0
|
||||
paste_back: 是否粘贴背景,默认True
|
||||
head_move_strength: 头部移动强度,默认0.7
|
||||
|
||||
Returns:
|
||||
Dict: 包含task_id和task_status的响应数据
|
||||
"""
|
||||
headers = {
|
||||
'X-DashScope-Async': 'enable',
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": "liveportrait",
|
||||
"input": {
|
||||
"image_url": image_url,
|
||||
"audio_url": audio_url
|
||||
},
|
||||
"parameters": {
|
||||
"template_id": template_id,
|
||||
"eye_move_freq": eye_move_freq,
|
||||
"video_fps": video_fps,
|
||||
"mouth_move_strength": mouth_move_strength,
|
||||
"paste_back": paste_back,
|
||||
"head_move_strength": head_move_strength
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(self.video_synthesis_url,
|
||||
headers=headers,
|
||||
json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"提交视频任务失败: {e}")
|
||||
|
||||
def get_task_status(self, task_id: str) -> Dict:
|
||||
"""
|
||||
查询任务状态
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
|
||||
Returns:
|
||||
Dict: 任务状态信息
|
||||
"""
|
||||
headers = {
|
||||
'Authorization': f'Bearer {self.api_key}'
|
||||
}
|
||||
|
||||
task_url = f"{self.base_url}/tasks/{task_id}"
|
||||
|
||||
try:
|
||||
response = requests.get(task_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"查询任务状态失败: {e}")
|
||||
|
||||
def wait_for_task_completion(self, task_id: str,
|
||||
poll_interval: int = 5,
|
||||
timeout: int = 300) -> Dict:
|
||||
"""
|
||||
等待任务完成
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
poll_interval: 轮询间隔(秒),默认5秒
|
||||
timeout: 超时时间(秒),默认300秒
|
||||
|
||||
Returns:
|
||||
Dict: 任务完成后的结果
|
||||
"""
|
||||
start_time = time.time()
|
||||
|
||||
while time.time() - start_time < timeout:
|
||||
task_status = self.get_task_status(task_id)
|
||||
|
||||
status = task_status.get('output', {}).get('task_status')
|
||||
|
||||
if status == 'SUCCEEDED':
|
||||
return task_status
|
||||
elif status == 'FAILED':
|
||||
error_code = task_status.get('output', {}).get('code', '未知错误')
|
||||
error_message = task_status.get('output', {}).get('message', '无错误信息')
|
||||
raise Exception(f"任务执行失败: {error_code} - {error_message}")
|
||||
elif status in ['PENDING', 'RUNNING']:
|
||||
print(f"任务状态: {status}, 等待中...")
|
||||
time.sleep(poll_interval)
|
||||
else:
|
||||
raise Exception(f"未知的任务状态: {status}")
|
||||
|
||||
raise Exception(f"任务超时,未在{timeout}秒内完成")
|
||||
|
||||
def generate_video(self, image_url: str, audio_url: str,
|
||||
**kwargs) -> Optional[str]:
|
||||
"""
|
||||
生成视频的完整流程
|
||||
|
||||
Args:
|
||||
image_url: 输入图片URL
|
||||
audio_url: 输入音频URL
|
||||
**kwargs: 其他参数,同submit_video_task
|
||||
|
||||
Returns:
|
||||
str: 生成的视频URL,失败返回None
|
||||
"""
|
||||
try:
|
||||
# 提交任务
|
||||
submit_response = self.submit_video_task(image_url, audio_url, **kwargs)
|
||||
task_id = submit_response.get('output', {}).get('task_id')
|
||||
|
||||
if not task_id:
|
||||
print("提交任务失败,未获取到task_id")
|
||||
return None
|
||||
|
||||
print(f"任务已提交,task_id: {task_id}")
|
||||
|
||||
# 等待任务完成
|
||||
result = self.wait_for_task_completion(task_id)
|
||||
|
||||
# 获取视频URL
|
||||
video_url = result.get('output', {}).get('results', {}).get('video_url')
|
||||
|
||||
if video_url:
|
||||
print(f"视频生成成功: {video_url}")
|
||||
# 获取使用情况信息
|
||||
usage = result.get('usage', {})
|
||||
if usage:
|
||||
print(f"视频时长: {usage.get('video_duration')}秒")
|
||||
print(f"视频比例: {usage.get('video_ratio')}")
|
||||
return video_url
|
||||
else:
|
||||
print("未找到生成的视频URL")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"视频生成失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 替换为您的实际API密钥
|
||||
API_KEY = Config.ALY_LLM_API_KEY
|
||||
|
||||
# 创建视频生成实例
|
||||
video_retalk = VideoRetalk(API_KEY)
|
||||
|
||||
# 示例:生成视频
|
||||
try:
|
||||
video_url = video_retalk.generate_video(
|
||||
image_url="https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Backup/LiBai.jpg",
|
||||
audio_url="https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Backup/JiangJinJiu.mp3",
|
||||
template_id="normal",
|
||||
eye_move_freq=0.5,
|
||||
video_fps=30,
|
||||
mouth_move_strength=1.0,
|
||||
paste_back=True,
|
||||
head_move_strength=0.7
|
||||
)
|
||||
|
||||
if video_url:
|
||||
print(f"最终视频URL: {video_url}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"执行过程中发生错误: {e}")
|
BIN
dsLightRag/Util/__pycache__/GengerateAudio.cpython-310.pyc
Normal file
BIN
dsLightRag/Util/__pycache__/GengerateAudio.cpython-310.pyc
Normal file
Binary file not shown.
BIN
dsLightRag/Util/__pycache__/VideoRetalk.cpython-310.pyc
Normal file
BIN
dsLightRag/Util/__pycache__/VideoRetalk.cpython-310.pyc
Normal file
Binary file not shown.
Reference in New Issue
Block a user