'commit'
This commit is contained in:
132
dsLightRag/Routes/VideoRetalkRoute.py
Normal file
132
dsLightRag/Routes/VideoRetalkRoute.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import datetime
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Query, Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydantic import BaseModel
|
||||
|
||||
from Config import Config
|
||||
from Util.VideoRetalk import VideoRetalk
|
||||
|
||||
# 创建视频生成路由
|
||||
router = APIRouter(prefix="/api/video", tags=["视频生成"])
|
||||
|
||||
# 配置日志
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 仅保留视频相关模型定义
|
||||
class VideoRetalkRequest(BaseModel):
|
||||
"""视频生成请求参数"""
|
||||
image_url: str
|
||||
audio_url: str
|
||||
template_id: Optional[str] = "normal"
|
||||
eye_move_freq: Optional[float] = 0.5
|
||||
video_fps: Optional[int] = 30
|
||||
mouth_move_strength: Optional[float] = 1.0
|
||||
paste_back: Optional[bool] = True
|
||||
head_move_strength: Optional[float] = 0.7
|
||||
|
||||
|
||||
class VideoRetalkResponse(BaseModel):
|
||||
"""视频生成响应"""
|
||||
success: bool
|
||||
message: str
|
||||
task_id: Optional[str] = None
|
||||
video_url: Optional[str] = None
|
||||
video_duration: Optional[float] = None
|
||||
video_ratio: Optional[str] = None
|
||||
request_id: Optional[str] = None
|
||||
|
||||
|
||||
@router.post("/generate", response_model=VideoRetalkResponse)
|
||||
async def generate_video(request: VideoRetalkRequest):
|
||||
"""
|
||||
生成人物朗读视频接口
|
||||
根据输入的人物图片和音频,生成口型匹配的朗读视频
|
||||
"""
|
||||
try:
|
||||
# 初始化VideoRetalk实例
|
||||
video_retalk = VideoRetalk(Config.ALY_LLM_API_KEY)
|
||||
|
||||
# 调用视频生成方法
|
||||
video_url = video_retalk.generate_video(
|
||||
image_url=request.image_url,
|
||||
audio_url=request.audio_url,
|
||||
template_id=request.template_id,
|
||||
eye_move_freq=request.eye_move_freq,
|
||||
video_fps=request.video_fps,
|
||||
mouth_move_strength=request.mouth_move_strength,
|
||||
paste_back=request.paste_back,
|
||||
head_move_strength=request.head_move_strength
|
||||
)
|
||||
|
||||
if video_url:
|
||||
return VideoRetalkResponse(
|
||||
success=True,
|
||||
message="视频生成成功",
|
||||
video_url=video_url,
|
||||
# 以下字段在实际实现中可以从API响应中获取
|
||||
task_id=str(uuid.uuid4()),
|
||||
video_duration=10.23, # 示例值,实际应从API响应获取
|
||||
video_ratio="standard", # 示例值,实际应从API响应获取
|
||||
request_id=str(uuid.uuid4())
|
||||
)
|
||||
else:
|
||||
return VideoRetalkResponse(
|
||||
success=False,
|
||||
message="视频生成失败"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"视频生成接口错误: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"视频生成失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/task/status")
|
||||
async def get_task_status(task_id: str = Query(..., description="任务ID")):
|
||||
"""
|
||||
查询视频生成任务状态
|
||||
"""
|
||||
try:
|
||||
video_retalk = VideoRetalk(Config.ALY_LLM_API_KEY)
|
||||
task_status = video_retalk.get_task_status(task_id)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": task_status
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"查询任务状态错误: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"查询任务状态失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
健康检查接口
|
||||
"""
|
||||
return {
|
||||
"status": "healthy",
|
||||
"timestamp": datetime.datetime.now().isoformat(),
|
||||
"service": "VideoRetalk API"
|
||||
}
|
||||
|
||||
|
||||
# 保留全局异常处理
|
||||
def global_exception_handler(request: Request, exc: Exception):
|
||||
logger.error(f"全局异常: {exc}")
|
||||
return JSONResponse(
|
||||
status_code=500,
|
||||
content={"success": False, "message": f"服务器内部错误: {str(exc)}"}
|
||||
)
|
||||
|
||||
|
BIN
dsLightRag/Routes/__pycache__/VideoRetalkRoute.cpython-310.pyc
Normal file
BIN
dsLightRag/Routes/__pycache__/VideoRetalkRoute.cpython-310.pyc
Normal file
Binary file not shown.
BIN
dsLightRag/Routes/__pycache__/ttsRoute.cpython-310.pyc
Normal file
BIN
dsLightRag/Routes/__pycache__/ttsRoute.cpython-310.pyc
Normal file
Binary file not shown.
145
dsLightRag/Routes/ttsRoute.py
Normal file
145
dsLightRag/Routes/ttsRoute.py
Normal file
@@ -0,0 +1,145 @@
|
||||
import logging
|
||||
import uuid
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from pydantic import BaseModel
|
||||
|
||||
from Util.GengerateAudio import ByteDanceTTS
|
||||
|
||||
# 创建声音生成路由
|
||||
router = APIRouter(prefix="/api/tts", tags=["声音生成"])
|
||||
|
||||
# 配置日志
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 初始化TTS实例
|
||||
tts_instance = ByteDanceTTS()
|
||||
|
||||
class TextToSpeechRequest(BaseModel):
|
||||
"""文本转语音请求参数"""
|
||||
text: str
|
||||
voice_type: Optional[str] = None
|
||||
speed_ratio: Optional[float] = 1.0
|
||||
volume_ratio: Optional[float] = 1.0
|
||||
pitch_ratio: Optional[float] = 1.0
|
||||
encoding: Optional[str] = "mp3"
|
||||
|
||||
|
||||
class TextToSpeechResponse(BaseModel):
|
||||
"""文本转语音响应"""
|
||||
success: bool
|
||||
message: str
|
||||
audio_url: Optional[str] = None
|
||||
audio_size: Optional[float] = None
|
||||
audio_format: Optional[str] = None
|
||||
request_id: Optional[str] = None
|
||||
|
||||
|
||||
@router.get("/voices/categories")
|
||||
async def get_voice_categories():
|
||||
"""
|
||||
获取所有音色分类接口
|
||||
返回所有可用的音色分类列表
|
||||
"""
|
||||
try:
|
||||
categories = tts_instance.get_all_categories()
|
||||
return {
|
||||
"success": True,
|
||||
"data": categories,
|
||||
"message": "获取音色分类成功"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取音色分类错误: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"获取音色分类失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/voices/by-category/{category}")
|
||||
async def get_voices_by_category(category: str):
|
||||
"""
|
||||
根据分类获取音色列表接口
|
||||
Args:
|
||||
category: 音色分类名称
|
||||
返回指定分类下的所有音色列表
|
||||
"""
|
||||
try:
|
||||
voices = tts_instance.get_voices_by_category(category)
|
||||
if not voices:
|
||||
return {
|
||||
"success": False,
|
||||
"message": f"未找到分类 '{category}' 下的音色"
|
||||
}
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"data": voices,
|
||||
"message": f"获取分类 '{category}' 下的音色成功"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取分类 '{category}' 下的音色错误: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"获取分类 '{category}' 下的音色失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.get("/voices/all")
|
||||
async def get_all_voices():
|
||||
"""
|
||||
获取所有音色分类和音色列表接口
|
||||
返回所有音色分类和每个分类下的音色列表
|
||||
"""
|
||||
try:
|
||||
all_voices = tts_instance.get_all_voices()
|
||||
return {
|
||||
"success": True,
|
||||
"data": all_voices,
|
||||
"message": "获取所有音色分类和列表成功"
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"获取所有音色分类和列表错误: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"获取所有音色分类和列表失败: {str(e)}"
|
||||
)
|
||||
|
||||
|
||||
@router.post("/generate", response_model=TextToSpeechResponse)
|
||||
async def generate_audio(request: TextToSpeechRequest):
|
||||
"""
|
||||
文本转语音接口
|
||||
根据输入文本和语音参数生成音频文件
|
||||
"""
|
||||
try:
|
||||
# 调用TTS工具生成音频
|
||||
audio_url = tts_instance.generate_audio(
|
||||
text=request.text,
|
||||
voice_type=request.voice_type,
|
||||
speed_ratio=request.speed_ratio,
|
||||
volume_ratio=request.volume_ratio,
|
||||
pitch_ratio=request.pitch_ratio,
|
||||
encoding=request.encoding
|
||||
)
|
||||
|
||||
if audio_url:
|
||||
return TextToSpeechResponse(
|
||||
success=True,
|
||||
message="音频生成成功",
|
||||
audio_url=audio_url,
|
||||
audio_format=request.encoding,
|
||||
request_id=str(uuid.uuid4())
|
||||
)
|
||||
else:
|
||||
return TextToSpeechResponse(
|
||||
success=False,
|
||||
message="音频生成失败"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"文本转语音接口错误: {e}")
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail=f"音频生成失败: {str(e)}"
|
||||
)
|
@@ -27,7 +27,8 @@ from Routes.TeachingModel.tasks.BackgroundTasks import train_document_task
|
||||
from Routes.XueBanRoute import router as xueban_router
|
||||
from Routes.ZuoWen import router as zuowen_router
|
||||
from Routes.RecognizeEduQuestion import router as ocr_router
|
||||
|
||||
from Routes.VideoRetalkRoute import router as videoRetalk_router
|
||||
from Routes.ttsRoute import router as tts_router
|
||||
# 控制日志输出
|
||||
logger = logging.getLogger('lightrag')
|
||||
logger.setLevel(logging.INFO)
|
||||
@@ -80,6 +81,10 @@ app.include_router(mj_router) # Midjourney路由
|
||||
app.include_router(qwen_image_router) # Qwen Image 路由
|
||||
app.include_router(ocr_router) # 教育场景识别
|
||||
|
||||
app.include_router(videoRetalk_router) # 视频复读
|
||||
app.include_router(tts_router) # 文本转语音
|
||||
|
||||
|
||||
# Teaching Model 相关路由
|
||||
# 登录相关(不用登录)
|
||||
app.include_router(login_router, prefix="/api/login", tags=["login"])
|
||||
|
@@ -1,73 +0,0 @@
|
||||
#coding=utf-8
|
||||
|
||||
'''
|
||||
requires Python 3.6 or later
|
||||
pip install requests
|
||||
'''
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
import requests
|
||||
|
||||
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
|
||||
|
||||
# 填写平台申请的appid, access_token以及cluster
|
||||
appid = HS_APP_ID
|
||||
access_token= HS_ACCESS_TOKEN
|
||||
cluster = HS_CLUSTER_ID
|
||||
|
||||
voice_type = HS_VOICE_TYPE_QINCANG
|
||||
host = "openspeech.bytedance.com"
|
||||
api_url = f"https://{host}/api/v1/tts"
|
||||
|
||||
header = {"Authorization": f"Bearer;{access_token}"}
|
||||
|
||||
request_json = {
|
||||
"app": {
|
||||
"appid": appid,
|
||||
"token": "access_token",
|
||||
"cluster": cluster
|
||||
},
|
||||
"user": {
|
||||
"uid": "388808087185088"
|
||||
},
|
||||
"audio": {
|
||||
"voice_type": voice_type,
|
||||
"encoding": "mp3",
|
||||
"speed_ratio": 1.0,
|
||||
"volume_ratio": 1.0,
|
||||
"pitch_ratio": 1.0,
|
||||
},
|
||||
"request": {
|
||||
"reqid": str(uuid.uuid4()),
|
||||
"text": """
|
||||
君不见,黄河之水天上来,奔流到海不复回。
|
||||
君不见,高堂明镜悲白发,朝如青丝暮成雪。
|
||||
人生得意须尽欢,莫使金樽空对月。
|
||||
天生我材必有用,千金散尽还复来。
|
||||
烹羊宰牛且为乐,会须一饮三百杯。
|
||||
岑夫子,丹丘生,将进酒,杯莫停。
|
||||
与君歌一曲,请君为我倾耳听。
|
||||
钟鼓馔玉不足贵,但愿长醉不复醒。
|
||||
古来圣贤皆寂寞,惟有饮者留其名。
|
||||
陈王昔时宴平乐,斗酒十千恣欢谑。
|
||||
主人何为言少钱,径须沽取对君酌。
|
||||
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
|
||||
""",
|
||||
"text_type": "plain",
|
||||
"operation": "query",
|
||||
"with_frontend": 1,
|
||||
"frontend_type": "unitTson"
|
||||
}
|
||||
}
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
resp = requests.post(api_url, json.dumps(request_json), headers=header)
|
||||
#print(f"resp body: \n{resp.json()}")
|
||||
if "data" in resp.json():
|
||||
data = resp.json()["data"]
|
||||
file_to_save = open("test_submit.mp3", "wb")
|
||||
file_to_save.write(base64.b64decode(data))
|
||||
except Exception as e:
|
||||
e.with_traceback()
|
284
dsLightRag/Util/GengerateAudio.py
Normal file
284
dsLightRag/Util/GengerateAudio.py
Normal file
@@ -0,0 +1,284 @@
|
||||
#coding=utf-8
|
||||
|
||||
'''
|
||||
字节跳动语音合成API封装类
|
||||
requires Python 3.6 or later
|
||||
pip install requests
|
||||
'''
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
import requests
|
||||
from typing import Optional, Dict, Any
|
||||
from pathlib import Path
|
||||
|
||||
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
|
||||
|
||||
|
||||
# 在ByteDanceTTS类中添加以下音色分类字典
|
||||
|
||||
class ByteDanceTTS:
|
||||
"""
|
||||
字节跳动语音合成API封装类
|
||||
提供文本转语音功能
|
||||
"""
|
||||
|
||||
# 音色分类字典
|
||||
TTS_VOICES = {
|
||||
"通用场景": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
},
|
||||
"有声阅读": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
|
||||
},
|
||||
"智能助手": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
},
|
||||
"视频配音": {
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
},
|
||||
"特色音色": {
|
||||
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
|
||||
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)"
|
||||
},
|
||||
"广告配音": {
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
|
||||
},
|
||||
"新闻播报": {
|
||||
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
|
||||
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)"
|
||||
},
|
||||
"教育场景": {
|
||||
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
|
||||
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
|
||||
}
|
||||
}
|
||||
|
||||
def __init__(self,
|
||||
app_id: Optional[str] = None,
|
||||
access_token: Optional[str] = None,
|
||||
cluster_id: Optional[str] = None,
|
||||
voice_type: Optional[str] = None):
|
||||
"""
|
||||
初始化语音合成类
|
||||
|
||||
Args:
|
||||
app_id: 应用ID,默认为Config中的HS_APP_ID
|
||||
access_token: 访问令牌,默认为Config中的HS_ACCESS_TOKEN
|
||||
cluster_id: 集群ID,默认为Config中的HS_CLUSTER_ID
|
||||
voice_type: 声音类型,默认为Config中的HS_VOICE_TYPE_QINCANG
|
||||
"""
|
||||
self.app_id = app_id or HS_APP_ID
|
||||
self.access_token = access_token or HS_ACCESS_TOKEN
|
||||
self.cluster_id = cluster_id or HS_CLUSTER_ID
|
||||
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
|
||||
|
||||
self.host = "openspeech.bytedance.com"
|
||||
self.api_url = f"https://{self.host}/api/v1/tts"
|
||||
self.header = {"Authorization": f"Bearer;{self.access_token}"}
|
||||
|
||||
def generate_audio(self,
|
||||
text: str,
|
||||
output_path: Optional[str] = None,
|
||||
voice_type: Optional[str] = None,
|
||||
encoding: str = "mp3",
|
||||
speed_ratio: float = 1.0,
|
||||
volume_ratio: float = 1.0,
|
||||
pitch_ratio: float = 1.0,
|
||||
text_type: str = "plain",
|
||||
operation: str = "query") -> Optional[bytes]:
|
||||
"""
|
||||
生成语音音频
|
||||
|
||||
Args:
|
||||
text: 要转换的文本内容
|
||||
output_path: 输出文件路径,如果提供则保存为文件
|
||||
voice_type: 声音类型,覆盖初始化设置
|
||||
encoding: 音频编码格式,默认mp3
|
||||
speed_ratio: 语速比例,默认1.0
|
||||
volume_ratio: 音量比例,默认1.0
|
||||
pitch_ratio: 音调比例,默认1.0
|
||||
text_type: 文本类型,默认plain
|
||||
operation: 操作类型,默认query
|
||||
|
||||
Returns:
|
||||
bytes: 音频二进制数据,失败返回None
|
||||
"""
|
||||
# 构建请求JSON
|
||||
request_json = {
|
||||
"app": {
|
||||
"appid": self.app_id,
|
||||
"token": "access_token",
|
||||
"cluster": self.cluster_id
|
||||
},
|
||||
"user": {
|
||||
"uid": str(uuid.uuid4()) # 使用随机用户ID
|
||||
},
|
||||
"audio": {
|
||||
"voice_type": voice_type or self.voice_type,
|
||||
"encoding": encoding,
|
||||
"speed_ratio": speed_ratio,
|
||||
"volume_ratio": volume_ratio,
|
||||
"pitch_ratio": pitch_ratio,
|
||||
},
|
||||
"request": {
|
||||
"reqid": str(uuid.uuid4()),
|
||||
"text": text,
|
||||
"text_type": text_type,
|
||||
"operation": operation,
|
||||
"with_frontend": 1,
|
||||
"frontend_type": "unitTson"
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
# 发送请求
|
||||
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
|
||||
resp.raise_for_status()
|
||||
|
||||
resp_data = resp.json()
|
||||
|
||||
if "data" in resp_data:
|
||||
audio_data = base64.b64decode(resp_data["data"])
|
||||
|
||||
# 如果提供了输出路径,保存文件
|
||||
if output_path:
|
||||
self.save_audio(audio_data, output_path)
|
||||
|
||||
return audio_data
|
||||
else:
|
||||
print(f"API响应中未包含音频数据: {resp_data}")
|
||||
return None
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"请求失败: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"生成音频失败: {e}")
|
||||
return None
|
||||
|
||||
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
|
||||
"""
|
||||
保存音频数据到文件
|
||||
|
||||
Args:
|
||||
audio_data: 音频二进制数据
|
||||
output_path: 输出文件路径
|
||||
|
||||
Returns:
|
||||
bool: 保存是否成功
|
||||
"""
|
||||
try:
|
||||
# 确保目录存在
|
||||
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(audio_data)
|
||||
|
||||
print(f"音频已保存到: {output_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"保存音频失败: {e}")
|
||||
return False
|
||||
|
||||
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
|
||||
"""
|
||||
获取音频信息
|
||||
|
||||
Args:
|
||||
audio_data: 音频二进制数据
|
||||
|
||||
Returns:
|
||||
Dict: 包含音频大小和格式的信息
|
||||
"""
|
||||
return {
|
||||
"size_bytes": len(audio_data),
|
||||
"size_kb": len(audio_data) / 1024,
|
||||
"format": "mp3" # 目前固定为mp3格式
|
||||
}
|
||||
|
||||
def get_voices_by_category(self, category: str) -> Dict[str, str]:
|
||||
"""
|
||||
根据分类获取音色列表
|
||||
|
||||
Args:
|
||||
category: 分类名称
|
||||
|
||||
Returns:
|
||||
Dict: 音色字典,key为voice_type,value为音色描述
|
||||
"""
|
||||
return self.TTS_VOICES.get(category, {})
|
||||
|
||||
def get_all_categories(self) -> list:
|
||||
"""
|
||||
获取所有音色分类
|
||||
|
||||
Returns:
|
||||
list: 分类名称列表
|
||||
"""
|
||||
return list(self.TTS_VOICES.keys())
|
||||
|
||||
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
|
||||
"""
|
||||
获取所有音色分类和音色列表
|
||||
|
||||
Returns:
|
||||
Dict: 所有音色分类和音色列表
|
||||
"""
|
||||
return self.TTS_VOICES
|
||||
|
||||
|
||||
def main():
|
||||
"""示例用法"""
|
||||
# 创建语音合成实例
|
||||
tts = ByteDanceTTS()
|
||||
|
||||
# 要转换的文本
|
||||
text = """
|
||||
君不见,黄河之水天上来,奔流到海不复回。
|
||||
君不见,高堂明镜悲白发,朝如青丝暮成雪。
|
||||
人生得意须尽欢,莫使金樽空对月。
|
||||
天生我材必有用,千金散尽还复来。
|
||||
烹羊宰牛且为乐,会须一饮三百杯。
|
||||
岑夫子,丹丘生,将进酒,杯莫停。
|
||||
与君歌一曲,请君为我倾耳听。
|
||||
钟鼓馔玉不足贵,但愿长醉不复醒。
|
||||
古来圣贤皆寂寞,惟有饮者留其名。
|
||||
陈王昔时宴平乐,斗酒十千恣欢谑。
|
||||
主人何为言少钱,径须沽取对君酌。
|
||||
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
|
||||
"""
|
||||
|
||||
# 生成音频并保存
|
||||
audio_data = tts.generate_audio(
|
||||
text=text,
|
||||
output_path="test_submit.mp3",
|
||||
voice_type=HS_VOICE_TYPE_QINCANG,
|
||||
speed_ratio=1.0,
|
||||
volume_ratio=1.0
|
||||
)
|
||||
|
||||
if audio_data:
|
||||
# 获取音频信息
|
||||
info = tts.get_audio_info(audio_data)
|
||||
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
|
||||
else:
|
||||
print("音频生成失败")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
BIN
dsLightRag/Util/__pycache__/GengerateAudio.cpython-310.pyc
Normal file
BIN
dsLightRag/Util/__pycache__/GengerateAudio.cpython-310.pyc
Normal file
Binary file not shown.
BIN
dsLightRag/Util/__pycache__/VideoRetalk.cpython-310.pyc
Normal file
BIN
dsLightRag/Util/__pycache__/VideoRetalk.cpython-310.pyc
Normal file
Binary file not shown.
539
dsLightRag/static/text-to-speech.html
Normal file
539
dsLightRag/static/text-to-speech.html
Normal file
@@ -0,0 +1,539 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>文本转语音</title>
|
||||
<style>
|
||||
* {
|
||||
margin: 0; padding: 0; box-sizing: border-box;
|
||||
font-family: 'PingFang SC', 'Microsoft YaHei', sans-serif;
|
||||
}
|
||||
body {
|
||||
background-color: #f5f7fa;
|
||||
color: #333;
|
||||
line-height: 1.6;
|
||||
}
|
||||
.container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
}
|
||||
header {
|
||||
text-align: center;
|
||||
padding: 40px 20px;
|
||||
background: linear-gradient(135deg, #3498db, #8e44ad);
|
||||
color: white;
|
||||
border-radius: 10px;
|
||||
margin-bottom: 30px;
|
||||
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
|
||||
}
|
||||
h1 {
|
||||
font-size: 2.2rem;
|
||||
margin-bottom: 10px;
|
||||
font-weight: 700;
|
||||
}
|
||||
.subtitle {
|
||||
font-size: 1.1rem;
|
||||
opacity: 0.9;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
.main-content {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 30px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
|
||||
.form-section {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
background-color: white;
|
||||
border-radius: 10px;
|
||||
padding: 25px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.result-section {
|
||||
flex: 1;
|
||||
min-width: 300px;
|
||||
background-color: white;
|
||||
border-radius: 10px;
|
||||
padding: 25px;
|
||||
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.form-group {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
label {
|
||||
display: block;
|
||||
margin-bottom: 8px;
|
||||
font-weight: 500;
|
||||
color: #2c3e50;
|
||||
}
|
||||
|
||||
select, textarea {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 6px;
|
||||
font-size: 16px;
|
||||
transition: border 0.3s;
|
||||
}
|
||||
|
||||
select:focus, textarea:focus {
|
||||
border-color: #3498db;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
textarea {
|
||||
min-height: 150px;
|
||||
resize: vertical;
|
||||
}
|
||||
|
||||
.btn {
|
||||
padding: 12px 24px;
|
||||
border: none;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
transition: background 0.3s;
|
||||
display: inline-block;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.btn-primary {
|
||||
background-color: #3498db;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.btn-primary:hover {
|
||||
background-color: #2980b9;
|
||||
}
|
||||
|
||||
.btn:disabled {
|
||||
background-color: #95a5a6;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
|
||||
.audio-player {
|
||||
margin-top: 20px;
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
.loading {
|
||||
display: none;
|
||||
text-align: center;
|
||||
margin: 20px 0;
|
||||
}
|
||||
|
||||
.loading.active {
|
||||
display: block;
|
||||
}
|
||||
|
||||
.spinner {
|
||||
border: 4px solid rgba(0, 0, 0, 0.1);
|
||||
border-radius: 50%;
|
||||
border-top: 4px solid #3498db;
|
||||
width: 40px;
|
||||
height: 40px;
|
||||
animation: spin 1s linear infinite;
|
||||
margin: 0 auto 15px;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.voice-options {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
||||
gap: 15px;
|
||||
margin-top: 15px;
|
||||
min-height: 100px;
|
||||
}
|
||||
|
||||
.voice-card {
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
cursor: pointer;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
|
||||
.voice-options p {
|
||||
color: #666;
|
||||
text-align: center;
|
||||
padding: 20px;
|
||||
}
|
||||
|
||||
.voice-card:hover {
|
||||
border-color: #3498db;
|
||||
box-shadow: 0 2px 8px rgba(52, 152, 219, 0.2);
|
||||
}
|
||||
|
||||
.voice-card.selected {
|
||||
border-color: #3498db;
|
||||
background-color: #e8f4fd;
|
||||
}
|
||||
|
||||
.voice-name {
|
||||
font-weight: 500;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.voice-description {
|
||||
font-size: 0.9rem;
|
||||
color: #666;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
color: #e74c3c;
|
||||
margin-top: 10px;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.success-message {
|
||||
color: #2ecc71;
|
||||
margin-top: 10px;
|
||||
display: none;
|
||||
}
|
||||
|
||||
@media screen and (max-width: 768px) {
|
||||
.main-content {
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.8rem;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<header>
|
||||
<h1>文本转语音</h1>
|
||||
<p class="subtitle">选择音色,输入文本,生成高质量语音</p>
|
||||
</header>
|
||||
|
||||
<div class="main-content">
|
||||
<div class="form-section">
|
||||
<h2>语音设置</h2>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="category-select">音色分类</label>
|
||||
<select id="category-select">
|
||||
<option value="">请选择音色分类</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label>选择音色</label>
|
||||
<div id="voice-options" class="voice-options">
|
||||
<p>请先选择音色分类</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="text-input">输入文本</label>
|
||||
<textarea id="text-input" placeholder="请输入要转换为语音的文本..."></textarea>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="speed-ratio">语速</label>
|
||||
<select id="speed-ratio">
|
||||
<option value="0.8">较慢</option>
|
||||
<option value="1.0" selected>正常</option>
|
||||
<option value="1.2">较快</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="volume-ratio">音量</label>
|
||||
<select id="volume-ratio">
|
||||
<option value="0.8">较小</option>
|
||||
<option value="1.0" selected>正常</option>
|
||||
<option value="1.2">较大</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="form-group">
|
||||
<label for="pitch-ratio">音调</label>
|
||||
<select id="pitch-ratio">
|
||||
<option value="0.8">较低</option>
|
||||
<option value="1.0" selected>正常</option>
|
||||
<option value="1.2">较高</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<button id="generate-btn" class="btn btn-primary">生成语音</button>
|
||||
|
||||
<div id="error-message" class="error-message"></div>
|
||||
<div id="success-message" class="success-message"></div>
|
||||
</div>
|
||||
|
||||
<div class="result-section">
|
||||
<h2>生成结果</h2>
|
||||
|
||||
<div id="loading" class="loading">
|
||||
<div class="spinner"></div>
|
||||
<p>正在生成语音,请稍候...</p>
|
||||
</div>
|
||||
|
||||
<div id="audio-result" style="display: none;">
|
||||
<audio id="audio-player" class="audio-player" controls></audio>
|
||||
<div class="form-group" style="margin-top: 20px;">
|
||||
<button id="download-btn" class="btn btn-primary">下载音频</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="empty-result" style="text-align: center; padding: 40px 0; color: #999;">
|
||||
<p>暂无生成结果</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// 获取DOM元素
|
||||
const categorySelect = document.getElementById('category-select');
|
||||
const voiceOptions = document.getElementById('voice-options');
|
||||
const textInput = document.getElementById('text-input');
|
||||
const speedRatio = document.getElementById('speed-ratio');
|
||||
const volumeRatio = document.getElementById('volume-ratio');
|
||||
const pitchRatio = document.getElementById('pitch-ratio');
|
||||
const generateBtn = document.getElementById('generate-btn');
|
||||
const loading = document.getElementById('loading');
|
||||
const audioResult = document.getElementById('audio-result');
|
||||
const audioPlayer = document.getElementById('audio-player');
|
||||
const downloadBtn = document.getElementById('download-btn');
|
||||
const emptyResult = document.getElementById('empty-result');
|
||||
const errorMessage = document.getElementById('error-message');
|
||||
const successMessage = document.getElementById('success-message');
|
||||
|
||||
// 当前选中的音色
|
||||
let selectedVoiceType = null;
|
||||
|
||||
// API基础URL
|
||||
const apiBaseUrl = '/api/VideoRetalk';
|
||||
|
||||
// 获取所有音色分类
|
||||
async function loadVoiceCategories() {
|
||||
try {
|
||||
const response = await fetch(`${apiBaseUrl}/voices/categories`);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
// 清空现有选项
|
||||
categorySelect.innerHTML = '<option value="">请选择音色分类</option>';
|
||||
|
||||
// 添加分类选项
|
||||
data.data.forEach(category => {
|
||||
const option = document.createElement('option');
|
||||
option.value = category;
|
||||
option.textContent = category;
|
||||
categorySelect.appendChild(option);
|
||||
});
|
||||
} else {
|
||||
showError('获取音色分类失败: ' + data.message);
|
||||
}
|
||||
} catch (error) {
|
||||
showError('获取音色分类失败: ' + error.message);
|
||||
}
|
||||
}
|
||||
|
||||
// 根据分类获取音色列表
|
||||
async function loadVoicesByCategory(category) {
|
||||
try {
|
||||
const response = await fetch(`${apiBaseUrl}/voices/by-category/${category}`);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
// 清空现有音色选项
|
||||
voiceOptions.innerHTML = '';
|
||||
|
||||
// 添加数据类型检查
|
||||
if (typeof data.data !== 'object' || data.data === null) {
|
||||
showError('获取的音色列表格式不正确');
|
||||
return;
|
||||
}
|
||||
|
||||
// 将对象转换为数组格式 [{voice_type, name, description}, ...]
|
||||
const voicesArray = Object.entries(data.data).map(([voiceType, description]) => {
|
||||
// 从描述中提取名称和说明(假设格式为 "名称(说明)")
|
||||
const match = description.match(/^(.*?)\((.*?)\)$/);
|
||||
return {
|
||||
voice_type: voiceType,
|
||||
name: match ? match[1] : description,
|
||||
description: match ? match[2] : '无描述'
|
||||
};
|
||||
});
|
||||
|
||||
// 检查数组是否为空
|
||||
if (voicesArray.length === 0) {
|
||||
voiceOptions.innerHTML = '<p>该分类下没有可用音色</p>';
|
||||
return;
|
||||
}
|
||||
|
||||
// 添加音色卡片
|
||||
voicesArray.forEach(voice => {
|
||||
const voiceCard = document.createElement('div');
|
||||
voiceCard.className = 'voice-card';
|
||||
voiceCard.dataset.voiceType = voice.voice_type;
|
||||
|
||||
const voiceName = document.createElement('div');
|
||||
voiceName.className = 'voice-name';
|
||||
voiceName.textContent = voice.name;
|
||||
|
||||
const voiceDescription = document.createElement('div');
|
||||
voiceDescription.className = 'voice-description';
|
||||
voiceDescription.textContent = voice.description || '暂无描述';
|
||||
|
||||
voiceCard.appendChild(voiceName);
|
||||
voiceCard.appendChild(voiceDescription);
|
||||
|
||||
// 添加点击事件
|
||||
voiceCard.addEventListener('click', function() {
|
||||
// 移除其他卡片的选中状态
|
||||
document.querySelectorAll('.voice-card').forEach(card => {
|
||||
card.classList.remove('selected');
|
||||
});
|
||||
|
||||
// 添加当前卡片的选中状态
|
||||
this.classList.add('selected');
|
||||
|
||||
// 保存选中的音色类型
|
||||
selectedVoiceType = this.dataset.voiceType;
|
||||
});
|
||||
|
||||
voiceOptions.appendChild(voiceCard);
|
||||
});
|
||||
} else {
|
||||
voiceOptions.innerHTML = '<p>获取音色列表失败: ' + data.message + '</p>';
|
||||
}
|
||||
} catch (error) {
|
||||
voiceOptions.innerHTML = '<p>获取音色列表失败: ' + error.message + '</p>';
|
||||
}
|
||||
}
|
||||
|
||||
// 生成语音
|
||||
async function generateAudio() {
|
||||
// 验证输入
|
||||
if (!selectedVoiceType) {
|
||||
showError('请选择音色');
|
||||
return;
|
||||
}
|
||||
|
||||
if (!textInput.value.trim()) {
|
||||
showError('请输入要转换的文本');
|
||||
return;
|
||||
}
|
||||
|
||||
// 隐藏错误和成功消息
|
||||
hideMessages();
|
||||
|
||||
// 显示加载状态
|
||||
loading.classList.add('active');
|
||||
audioResult.style.display = 'none';
|
||||
emptyResult.style.display = 'none';
|
||||
generateBtn.disabled = true;
|
||||
|
||||
try {
|
||||
// 准备请求数据
|
||||
const requestData = {
|
||||
text: textInput.value.trim(),
|
||||
voice_type: selectedVoiceType,
|
||||
speed_ratio: parseFloat(speedRatio.value),
|
||||
volume_ratio: parseFloat(volumeRatio.value),
|
||||
pitch_ratio: parseFloat(pitchRatio.value),
|
||||
encoding: 'mp3'
|
||||
};
|
||||
|
||||
// 发送请求
|
||||
const response = await fetch(`${apiBaseUrl}/tts`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(requestData)
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success) {
|
||||
// 显示成功消息
|
||||
showSuccess('语音生成成功');
|
||||
|
||||
// 设置音频播放器
|
||||
audioPlayer.src = data.audio_url;
|
||||
audioResult.style.display = 'block';
|
||||
|
||||
// 设置下载按钮
|
||||
downloadBtn.onclick = function() {
|
||||
const a = document.createElement('a');
|
||||
a.href = data.audio_url;
|
||||
a.download = 'tts_audio.mp3';
|
||||
document.body.appendChild(a);
|
||||
a.click();
|
||||
document.body.removeChild(a);
|
||||
};
|
||||
} else {
|
||||
showError('语音生成失败: ' + data.message);
|
||||
emptyResult.style.display = 'block';
|
||||
}
|
||||
} catch (error) {
|
||||
showError('语音生成失败: ' + error.message);
|
||||
emptyResult.style.display = 'block';
|
||||
} finally {
|
||||
// 隐藏加载状态
|
||||
loading.classList.remove('active');
|
||||
generateBtn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
// 显示错误消息
|
||||
function showError(message) {
|
||||
errorMessage.textContent = message;
|
||||
errorMessage.style.display = 'block';
|
||||
successMessage.style.display = 'none';
|
||||
}
|
||||
|
||||
// 显示成功消息
|
||||
function showSuccess(message) {
|
||||
successMessage.textContent = message;
|
||||
successMessage.style.display = 'block';
|
||||
errorMessage.style.display = 'none';
|
||||
}
|
||||
|
||||
// 隐藏所有消息
|
||||
function hideMessages() {
|
||||
errorMessage.style.display = 'none';
|
||||
successMessage.style.display = 'none';
|
||||
}
|
||||
|
||||
// 事件监听器
|
||||
categorySelect.addEventListener('change', function() {
|
||||
const category = this.value;
|
||||
if (category) {
|
||||
loadVoicesByCategory(category);
|
||||
} else {
|
||||
voiceOptions.innerHTML = '<p>请先选择音色分类</p>';
|
||||
selectedVoiceType = null;
|
||||
}
|
||||
});
|
||||
|
||||
generateBtn.addEventListener('click', generateAudio);
|
||||
|
||||
// 初始化
|
||||
loadVoiceCategories();
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
Reference in New Issue
Block a user