This commit is contained in:
2025-09-02 06:55:13 +08:00
parent 4078acb909
commit 1b959b3ba9
11 changed files with 1106 additions and 74 deletions

View File

@@ -0,0 +1,132 @@
import datetime
import logging
import uuid
from typing import Optional
from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from Config import Config
from Util.VideoRetalk import VideoRetalk
# 创建视频生成路由
router = APIRouter(prefix="/api/video", tags=["视频生成"])
# 配置日志
logger = logging.getLogger(__name__)
# 仅保留视频相关模型定义
class VideoRetalkRequest(BaseModel):
"""视频生成请求参数"""
image_url: str
audio_url: str
template_id: Optional[str] = "normal"
eye_move_freq: Optional[float] = 0.5
video_fps: Optional[int] = 30
mouth_move_strength: Optional[float] = 1.0
paste_back: Optional[bool] = True
head_move_strength: Optional[float] = 0.7
class VideoRetalkResponse(BaseModel):
"""视频生成响应"""
success: bool
message: str
task_id: Optional[str] = None
video_url: Optional[str] = None
video_duration: Optional[float] = None
video_ratio: Optional[str] = None
request_id: Optional[str] = None
@router.post("/generate", response_model=VideoRetalkResponse)
async def generate_video(request: VideoRetalkRequest):
"""
生成人物朗读视频接口
根据输入的人物图片和音频,生成口型匹配的朗读视频
"""
try:
# 初始化VideoRetalk实例
video_retalk = VideoRetalk(Config.ALY_LLM_API_KEY)
# 调用视频生成方法
video_url = video_retalk.generate_video(
image_url=request.image_url,
audio_url=request.audio_url,
template_id=request.template_id,
eye_move_freq=request.eye_move_freq,
video_fps=request.video_fps,
mouth_move_strength=request.mouth_move_strength,
paste_back=request.paste_back,
head_move_strength=request.head_move_strength
)
if video_url:
return VideoRetalkResponse(
success=True,
message="视频生成成功",
video_url=video_url,
# 以下字段在实际实现中可以从API响应中获取
task_id=str(uuid.uuid4()),
video_duration=10.23, # 示例值实际应从API响应获取
video_ratio="standard", # 示例值实际应从API响应获取
request_id=str(uuid.uuid4())
)
else:
return VideoRetalkResponse(
success=False,
message="视频生成失败"
)
except Exception as e:
logger.error(f"视频生成接口错误: {e}")
raise HTTPException(
status_code=500,
detail=f"视频生成失败: {str(e)}"
)
@router.get("/task/status")
async def get_task_status(task_id: str = Query(..., description="任务ID")):
"""
查询视频生成任务状态
"""
try:
video_retalk = VideoRetalk(Config.ALY_LLM_API_KEY)
task_status = video_retalk.get_task_status(task_id)
return {
"success": True,
"data": task_status
}
except Exception as e:
logger.error(f"查询任务状态错误: {e}")
raise HTTPException(
status_code=500,
detail=f"查询任务状态失败: {str(e)}"
)
@router.get("/health")
async def health_check():
"""
健康检查接口
"""
return {
"status": "healthy",
"timestamp": datetime.datetime.now().isoformat(),
"service": "VideoRetalk API"
}
# 保留全局异常处理
def global_exception_handler(request: Request, exc: Exception):
logger.error(f"全局异常: {exc}")
return JSONResponse(
status_code=500,
content={"success": False, "message": f"服务器内部错误: {str(exc)}"}
)

Binary file not shown.

View File

@@ -0,0 +1,145 @@
import logging
import uuid
from typing import Optional
from fastapi import APIRouter, HTTPException
from pydantic import BaseModel
from Util.GengerateAudio import ByteDanceTTS
# 创建声音生成路由
router = APIRouter(prefix="/api/tts", tags=["声音生成"])
# 配置日志
logger = logging.getLogger(__name__)
# 初始化TTS实例
tts_instance = ByteDanceTTS()
class TextToSpeechRequest(BaseModel):
"""文本转语音请求参数"""
text: str
voice_type: Optional[str] = None
speed_ratio: Optional[float] = 1.0
volume_ratio: Optional[float] = 1.0
pitch_ratio: Optional[float] = 1.0
encoding: Optional[str] = "mp3"
class TextToSpeechResponse(BaseModel):
"""文本转语音响应"""
success: bool
message: str
audio_url: Optional[str] = None
audio_size: Optional[float] = None
audio_format: Optional[str] = None
request_id: Optional[str] = None
@router.get("/voices/categories")
async def get_voice_categories():
"""
获取所有音色分类接口
返回所有可用的音色分类列表
"""
try:
categories = tts_instance.get_all_categories()
return {
"success": True,
"data": categories,
"message": "获取音色分类成功"
}
except Exception as e:
logger.error(f"获取音色分类错误: {e}")
raise HTTPException(
status_code=500,
detail=f"获取音色分类失败: {str(e)}"
)
@router.get("/voices/by-category/{category}")
async def get_voices_by_category(category: str):
"""
根据分类获取音色列表接口
Args:
category: 音色分类名称
返回指定分类下的所有音色列表
"""
try:
voices = tts_instance.get_voices_by_category(category)
if not voices:
return {
"success": False,
"message": f"未找到分类 '{category}' 下的音色"
}
return {
"success": True,
"data": voices,
"message": f"获取分类 '{category}' 下的音色成功"
}
except Exception as e:
logger.error(f"获取分类 '{category}' 下的音色错误: {e}")
raise HTTPException(
status_code=500,
detail=f"获取分类 '{category}' 下的音色失败: {str(e)}"
)
@router.get("/voices/all")
async def get_all_voices():
"""
获取所有音色分类和音色列表接口
返回所有音色分类和每个分类下的音色列表
"""
try:
all_voices = tts_instance.get_all_voices()
return {
"success": True,
"data": all_voices,
"message": "获取所有音色分类和列表成功"
}
except Exception as e:
logger.error(f"获取所有音色分类和列表错误: {e}")
raise HTTPException(
status_code=500,
detail=f"获取所有音色分类和列表失败: {str(e)}"
)
@router.post("/generate", response_model=TextToSpeechResponse)
async def generate_audio(request: TextToSpeechRequest):
"""
文本转语音接口
根据输入文本和语音参数生成音频文件
"""
try:
# 调用TTS工具生成音频
audio_url = tts_instance.generate_audio(
text=request.text,
voice_type=request.voice_type,
speed_ratio=request.speed_ratio,
volume_ratio=request.volume_ratio,
pitch_ratio=request.pitch_ratio,
encoding=request.encoding
)
if audio_url:
return TextToSpeechResponse(
success=True,
message="音频生成成功",
audio_url=audio_url,
audio_format=request.encoding,
request_id=str(uuid.uuid4())
)
else:
return TextToSpeechResponse(
success=False,
message="音频生成失败"
)
except Exception as e:
logger.error(f"文本转语音接口错误: {e}")
raise HTTPException(
status_code=500,
detail=f"音频生成失败: {str(e)}"
)

View File

@@ -27,7 +27,8 @@ from Routes.TeachingModel.tasks.BackgroundTasks import train_document_task
from Routes.XueBanRoute import router as xueban_router
from Routes.ZuoWen import router as zuowen_router
from Routes.RecognizeEduQuestion import router as ocr_router
from Routes.VideoRetalkRoute import router as videoRetalk_router
from Routes.ttsRoute import router as tts_router
# 控制日志输出
logger = logging.getLogger('lightrag')
logger.setLevel(logging.INFO)
@@ -80,6 +81,10 @@ app.include_router(mj_router) # Midjourney路由
app.include_router(qwen_image_router) # Qwen Image 路由
app.include_router(ocr_router) # 教育场景识别
app.include_router(videoRetalk_router) # 视频复读
app.include_router(tts_router) # 文本转语音
# Teaching Model 相关路由
# 登录相关(不用登录)
app.include_router(login_router, prefix="/api/login", tags=["login"])

View File

@@ -1,73 +0,0 @@
#coding=utf-8
'''
requires Python 3.6 or later
pip install requests
'''
import base64
import json
import uuid
import requests
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
# 填写平台申请的appid, access_token以及cluster
appid = HS_APP_ID
access_token= HS_ACCESS_TOKEN
cluster = HS_CLUSTER_ID
voice_type = HS_VOICE_TYPE_QINCANG
host = "openspeech.bytedance.com"
api_url = f"https://{host}/api/v1/tts"
header = {"Authorization": f"Bearer;{access_token}"}
request_json = {
"app": {
"appid": appid,
"token": "access_token",
"cluster": cluster
},
"user": {
"uid": "388808087185088"
},
"audio": {
"voice_type": voice_type,
"encoding": "mp3",
"speed_ratio": 1.0,
"volume_ratio": 1.0,
"pitch_ratio": 1.0,
},
"request": {
"reqid": str(uuid.uuid4()),
"text": """
君不见,黄河之水天上来,奔流到海不复回。
君不见,高堂明镜悲白发,朝如青丝暮成雪。
人生得意须尽欢,莫使金樽空对月。
天生我材必有用,千金散尽还复来。
烹羊宰牛且为乐,会须一饮三百杯。
岑夫子,丹丘生,将进酒,杯莫停。
与君歌一曲,请君为我倾耳听。
钟鼓馔玉不足贵,但愿长醉不复醒。
古来圣贤皆寂寞,惟有饮者留其名。
陈王昔时宴平乐,斗酒十千恣欢谑。
主人何为言少钱,径须沽取对君酌。
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
""",
"text_type": "plain",
"operation": "query",
"with_frontend": 1,
"frontend_type": "unitTson"
}
}
if __name__ == '__main__':
try:
resp = requests.post(api_url, json.dumps(request_json), headers=header)
#print(f"resp body: \n{resp.json()}")
if "data" in resp.json():
data = resp.json()["data"]
file_to_save = open("test_submit.mp3", "wb")
file_to_save.write(base64.b64decode(data))
except Exception as e:
e.with_traceback()

View File

@@ -0,0 +1,284 @@
#coding=utf-8
'''
字节跳动语音合成API封装类
requires Python 3.6 or later
pip install requests
'''
import base64
import json
import uuid
import requests
from typing import Optional, Dict, Any
from pathlib import Path
from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
# 在ByteDanceTTS类中添加以下音色分类字典
class ByteDanceTTS:
"""
字节跳动语音合成API封装类
提供文本转语音功能
"""
# 音色分类字典
TTS_VOICES = {
"通用场景": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
},
"有声阅读": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
},
"智能助手": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
},
"视频配音": {
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)",
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
},
"特色音色": {
"zh_female_xiaoxue_moon_bigtts": "小雪(女声,温柔亲切)",
"zh_female_xiaomei_moon_bigtts": "小美(女声,甜美温柔)"
},
"广告配音": {
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)",
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)"
},
"新闻播报": {
"zh_female_xiaoli_moon_bigtts": "小丽(女声,清晰标准)",
"zh_male_xiaofeng_common": "小峰(男声,沉稳大气)"
},
"教育场景": {
"zh_female_xiaoxin_common": "小新(女声,自然流畅)",
"zh_male_xiaoyu_common": "小鱼(男声,年轻活力)"
}
}
def __init__(self,
app_id: Optional[str] = None,
access_token: Optional[str] = None,
cluster_id: Optional[str] = None,
voice_type: Optional[str] = None):
"""
初始化语音合成类
Args:
app_id: 应用ID默认为Config中的HS_APP_ID
access_token: 访问令牌默认为Config中的HS_ACCESS_TOKEN
cluster_id: 集群ID默认为Config中的HS_CLUSTER_ID
voice_type: 声音类型默认为Config中的HS_VOICE_TYPE_QINCANG
"""
self.app_id = app_id or HS_APP_ID
self.access_token = access_token or HS_ACCESS_TOKEN
self.cluster_id = cluster_id or HS_CLUSTER_ID
self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
self.host = "openspeech.bytedance.com"
self.api_url = f"https://{self.host}/api/v1/tts"
self.header = {"Authorization": f"Bearer;{self.access_token}"}
def generate_audio(self,
text: str,
output_path: Optional[str] = None,
voice_type: Optional[str] = None,
encoding: str = "mp3",
speed_ratio: float = 1.0,
volume_ratio: float = 1.0,
pitch_ratio: float = 1.0,
text_type: str = "plain",
operation: str = "query") -> Optional[bytes]:
"""
生成语音音频
Args:
text: 要转换的文本内容
output_path: 输出文件路径,如果提供则保存为文件
voice_type: 声音类型,覆盖初始化设置
encoding: 音频编码格式默认mp3
speed_ratio: 语速比例默认1.0
volume_ratio: 音量比例默认1.0
pitch_ratio: 音调比例默认1.0
text_type: 文本类型默认plain
operation: 操作类型默认query
Returns:
bytes: 音频二进制数据失败返回None
"""
# 构建请求JSON
request_json = {
"app": {
"appid": self.app_id,
"token": "access_token",
"cluster": self.cluster_id
},
"user": {
"uid": str(uuid.uuid4()) # 使用随机用户ID
},
"audio": {
"voice_type": voice_type or self.voice_type,
"encoding": encoding,
"speed_ratio": speed_ratio,
"volume_ratio": volume_ratio,
"pitch_ratio": pitch_ratio,
},
"request": {
"reqid": str(uuid.uuid4()),
"text": text,
"text_type": text_type,
"operation": operation,
"with_frontend": 1,
"frontend_type": "unitTson"
}
}
try:
# 发送请求
resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
resp.raise_for_status()
resp_data = resp.json()
if "data" in resp_data:
audio_data = base64.b64decode(resp_data["data"])
# 如果提供了输出路径,保存文件
if output_path:
self.save_audio(audio_data, output_path)
return audio_data
else:
print(f"API响应中未包含音频数据: {resp_data}")
return None
except requests.exceptions.RequestException as e:
print(f"请求失败: {e}")
return None
except Exception as e:
print(f"生成音频失败: {e}")
return None
def save_audio(self, audio_data: bytes, output_path: str) -> bool:
"""
保存音频数据到文件
Args:
audio_data: 音频二进制数据
output_path: 输出文件路径
Returns:
bool: 保存是否成功
"""
try:
# 确保目录存在
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "wb") as f:
f.write(audio_data)
print(f"音频已保存到: {output_path}")
return True
except Exception as e:
print(f"保存音频失败: {e}")
return False
def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
"""
获取音频信息
Args:
audio_data: 音频二进制数据
Returns:
Dict: 包含音频大小和格式的信息
"""
return {
"size_bytes": len(audio_data),
"size_kb": len(audio_data) / 1024,
"format": "mp3" # 目前固定为mp3格式
}
def get_voices_by_category(self, category: str) -> Dict[str, str]:
"""
根据分类获取音色列表
Args:
category: 分类名称
Returns:
Dict: 音色字典key为voice_typevalue为音色描述
"""
return self.TTS_VOICES.get(category, {})
def get_all_categories(self) -> list:
"""
获取所有音色分类
Returns:
list: 分类名称列表
"""
return list(self.TTS_VOICES.keys())
def get_all_voices(self) -> Dict[str, Dict[str, str]]:
"""
获取所有音色分类和音色列表
Returns:
Dict: 所有音色分类和音色列表
"""
return self.TTS_VOICES
def main():
"""示例用法"""
# 创建语音合成实例
tts = ByteDanceTTS()
# 要转换的文本
text = """
君不见,黄河之水天上来,奔流到海不复回。
君不见,高堂明镜悲白发,朝如青丝暮成雪。
人生得意须尽欢,莫使金樽空对月。
天生我材必有用,千金散尽还复来。
烹羊宰牛且为乐,会须一饮三百杯。
岑夫子,丹丘生,将进酒,杯莫停。
与君歌一曲,请君为我倾耳听。
钟鼓馔玉不足贵,但愿长醉不复醒。
古来圣贤皆寂寞,惟有饮者留其名。
陈王昔时宴平乐,斗酒十千恣欢谑。
主人何为言少钱,径须沽取对君酌。
五花马,千金裘,呼儿将出换美酒,与尔同销万古愁。
"""
# 生成音频并保存
audio_data = tts.generate_audio(
text=text,
output_path="test_submit.mp3",
voice_type=HS_VOICE_TYPE_QINCANG,
speed_ratio=1.0,
volume_ratio=1.0
)
if audio_data:
# 获取音频信息
info = tts.get_audio_info(audio_data)
print(f"音频生成成功,大小: {info['size_kb']:.2f} KB")
else:
print("音频生成失败")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,539 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>文本转语音</title>
<style>
* {
margin: 0; padding: 0; box-sizing: border-box;
font-family: 'PingFang SC', 'Microsoft YaHei', sans-serif;
}
body {
background-color: #f5f7fa;
color: #333;
line-height: 1.6;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
header {
text-align: center;
padding: 40px 20px;
background: linear-gradient(135deg, #3498db, #8e44ad);
color: white;
border-radius: 10px;
margin-bottom: 30px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
h1 {
font-size: 2.2rem;
margin-bottom: 10px;
font-weight: 700;
}
.subtitle {
font-size: 1.1rem;
opacity: 0.9;
max-width: 800px;
margin: 0 auto;
}
.main-content {
display: flex;
flex-wrap: wrap;
gap: 30px;
margin-bottom: 30px;
}
.form-section {
flex: 1;
min-width: 300px;
background-color: white;
border-radius: 10px;
padding: 25px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.result-section {
flex: 1;
min-width: 300px;
background-color: white;
border-radius: 10px;
padding: 25px;
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
}
.form-group {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 8px;
font-weight: 500;
color: #2c3e50;
}
select, textarea {
width: 100%;
padding: 12px;
border: 1px solid #ddd;
border-radius: 6px;
font-size: 16px;
transition: border 0.3s;
}
select:focus, textarea:focus {
border-color: #3498db;
outline: none;
}
textarea {
min-height: 150px;
resize: vertical;
}
.btn {
padding: 12px 24px;
border: none;
border-radius: 6px;
cursor: pointer;
font-size: 16px;
transition: background 0.3s;
display: inline-block;
text-align: center;
}
.btn-primary {
background-color: #3498db;
color: white;
}
.btn-primary:hover {
background-color: #2980b9;
}
.btn:disabled {
background-color: #95a5a6;
cursor: not-allowed;
}
.audio-player {
margin-top: 20px;
width: 100%;
}
.loading {
display: none;
text-align: center;
margin: 20px 0;
}
.loading.active {
display: block;
}
.spinner {
border: 4px solid rgba(0, 0, 0, 0.1);
border-radius: 50%;
border-top: 4px solid #3498db;
width: 40px;
height: 40px;
animation: spin 1s linear infinite;
margin: 0 auto 15px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.voice-options {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
gap: 15px;
margin-top: 15px;
min-height: 100px;
}
.voice-card {
border: 1px solid #ddd;
border-radius: 8px;
padding: 15px;
cursor: pointer;
transition: all 0.3s;
}
.voice-options p {
color: #666;
text-align: center;
padding: 20px;
}
.voice-card:hover {
border-color: #3498db;
box-shadow: 0 2px 8px rgba(52, 152, 219, 0.2);
}
.voice-card.selected {
border-color: #3498db;
background-color: #e8f4fd;
}
.voice-name {
font-weight: 500;
margin-bottom: 5px;
}
.voice-description {
font-size: 0.9rem;
color: #666;
}
.error-message {
color: #e74c3c;
margin-top: 10px;
display: none;
}
.success-message {
color: #2ecc71;
margin-top: 10px;
display: none;
}
@media screen and (max-width: 768px) {
.main-content {
flex-direction: column;
}
h1 {
font-size: 1.8rem;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>文本转语音</h1>
<p class="subtitle">选择音色,输入文本,生成高质量语音</p>
</header>
<div class="main-content">
<div class="form-section">
<h2>语音设置</h2>
<div class="form-group">
<label for="category-select">音色分类</label>
<select id="category-select">
<option value="">请选择音色分类</option>
</select>
</div>
<div class="form-group">
<label>选择音色</label>
<div id="voice-options" class="voice-options">
<p>请先选择音色分类</p>
</div>
</div>
<div class="form-group">
<label for="text-input">输入文本</label>
<textarea id="text-input" placeholder="请输入要转换为语音的文本..."></textarea>
</div>
<div class="form-group">
<label for="speed-ratio">语速</label>
<select id="speed-ratio">
<option value="0.8">较慢</option>
<option value="1.0" selected>正常</option>
<option value="1.2">较快</option>
</select>
</div>
<div class="form-group">
<label for="volume-ratio">音量</label>
<select id="volume-ratio">
<option value="0.8">较小</option>
<option value="1.0" selected>正常</option>
<option value="1.2">较大</option>
</select>
</div>
<div class="form-group">
<label for="pitch-ratio">音调</label>
<select id="pitch-ratio">
<option value="0.8">较低</option>
<option value="1.0" selected>正常</option>
<option value="1.2">较高</option>
</select>
</div>
<button id="generate-btn" class="btn btn-primary">生成语音</button>
<div id="error-message" class="error-message"></div>
<div id="success-message" class="success-message"></div>
</div>
<div class="result-section">
<h2>生成结果</h2>
<div id="loading" class="loading">
<div class="spinner"></div>
<p>正在生成语音,请稍候...</p>
</div>
<div id="audio-result" style="display: none;">
<audio id="audio-player" class="audio-player" controls></audio>
<div class="form-group" style="margin-top: 20px;">
<button id="download-btn" class="btn btn-primary">下载音频</button>
</div>
</div>
<div id="empty-result" style="text-align: center; padding: 40px 0; color: #999;">
<p>暂无生成结果</p>
</div>
</div>
</div>
</div>
<script>
document.addEventListener('DOMContentLoaded', function() {
// 获取DOM元素
const categorySelect = document.getElementById('category-select');
const voiceOptions = document.getElementById('voice-options');
const textInput = document.getElementById('text-input');
const speedRatio = document.getElementById('speed-ratio');
const volumeRatio = document.getElementById('volume-ratio');
const pitchRatio = document.getElementById('pitch-ratio');
const generateBtn = document.getElementById('generate-btn');
const loading = document.getElementById('loading');
const audioResult = document.getElementById('audio-result');
const audioPlayer = document.getElementById('audio-player');
const downloadBtn = document.getElementById('download-btn');
const emptyResult = document.getElementById('empty-result');
const errorMessage = document.getElementById('error-message');
const successMessage = document.getElementById('success-message');
// 当前选中的音色
let selectedVoiceType = null;
// API基础URL
const apiBaseUrl = '/api/VideoRetalk';
// 获取所有音色分类
async function loadVoiceCategories() {
try {
const response = await fetch(`${apiBaseUrl}/voices/categories`);
const data = await response.json();
if (data.success) {
// 清空现有选项
categorySelect.innerHTML = '<option value="">请选择音色分类</option>';
// 添加分类选项
data.data.forEach(category => {
const option = document.createElement('option');
option.value = category;
option.textContent = category;
categorySelect.appendChild(option);
});
} else {
showError('获取音色分类失败: ' + data.message);
}
} catch (error) {
showError('获取音色分类失败: ' + error.message);
}
}
// 根据分类获取音色列表
async function loadVoicesByCategory(category) {
try {
const response = await fetch(`${apiBaseUrl}/voices/by-category/${category}`);
const data = await response.json();
if (data.success) {
// 清空现有音色选项
voiceOptions.innerHTML = '';
// 添加数据类型检查
if (typeof data.data !== 'object' || data.data === null) {
showError('获取的音色列表格式不正确');
return;
}
// 将对象转换为数组格式 [{voice_type, name, description}, ...]
const voicesArray = Object.entries(data.data).map(([voiceType, description]) => {
// 从描述中提取名称和说明(假设格式为 "名称(说明)"
const match = description.match(/^(.*?)\((.*?)\)$/);
return {
voice_type: voiceType,
name: match ? match[1] : description,
description: match ? match[2] : '无描述'
};
});
// 检查数组是否为空
if (voicesArray.length === 0) {
voiceOptions.innerHTML = '<p>该分类下没有可用音色</p>';
return;
}
// 添加音色卡片
voicesArray.forEach(voice => {
const voiceCard = document.createElement('div');
voiceCard.className = 'voice-card';
voiceCard.dataset.voiceType = voice.voice_type;
const voiceName = document.createElement('div');
voiceName.className = 'voice-name';
voiceName.textContent = voice.name;
const voiceDescription = document.createElement('div');
voiceDescription.className = 'voice-description';
voiceDescription.textContent = voice.description || '暂无描述';
voiceCard.appendChild(voiceName);
voiceCard.appendChild(voiceDescription);
// 添加点击事件
voiceCard.addEventListener('click', function() {
// 移除其他卡片的选中状态
document.querySelectorAll('.voice-card').forEach(card => {
card.classList.remove('selected');
});
// 添加当前卡片的选中状态
this.classList.add('selected');
// 保存选中的音色类型
selectedVoiceType = this.dataset.voiceType;
});
voiceOptions.appendChild(voiceCard);
});
} else {
voiceOptions.innerHTML = '<p>获取音色列表失败: ' + data.message + '</p>';
}
} catch (error) {
voiceOptions.innerHTML = '<p>获取音色列表失败: ' + error.message + '</p>';
}
}
// 生成语音
async function generateAudio() {
// 验证输入
if (!selectedVoiceType) {
showError('请选择音色');
return;
}
if (!textInput.value.trim()) {
showError('请输入要转换的文本');
return;
}
// 隐藏错误和成功消息
hideMessages();
// 显示加载状态
loading.classList.add('active');
audioResult.style.display = 'none';
emptyResult.style.display = 'none';
generateBtn.disabled = true;
try {
// 准备请求数据
const requestData = {
text: textInput.value.trim(),
voice_type: selectedVoiceType,
speed_ratio: parseFloat(speedRatio.value),
volume_ratio: parseFloat(volumeRatio.value),
pitch_ratio: parseFloat(pitchRatio.value),
encoding: 'mp3'
};
// 发送请求
const response = await fetch(`${apiBaseUrl}/tts`, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(requestData)
});
const data = await response.json();
if (data.success) {
// 显示成功消息
showSuccess('语音生成成功');
// 设置音频播放器
audioPlayer.src = data.audio_url;
audioResult.style.display = 'block';
// 设置下载按钮
downloadBtn.onclick = function() {
const a = document.createElement('a');
a.href = data.audio_url;
a.download = 'tts_audio.mp3';
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
} else {
showError('语音生成失败: ' + data.message);
emptyResult.style.display = 'block';
}
} catch (error) {
showError('语音生成失败: ' + error.message);
emptyResult.style.display = 'block';
} finally {
// 隐藏加载状态
loading.classList.remove('active');
generateBtn.disabled = false;
}
}
// 显示错误消息
function showError(message) {
errorMessage.textContent = message;
errorMessage.style.display = 'block';
successMessage.style.display = 'none';
}
// 显示成功消息
function showSuccess(message) {
successMessage.textContent = message;
successMessage.style.display = 'block';
errorMessage.style.display = 'none';
}
// 隐藏所有消息
function hideMessages() {
errorMessage.style.display = 'none';
successMessage.style.display = 'none';
}
// 事件监听器
categorySelect.addEventListener('change', function() {
const category = this.value;
if (category) {
loadVoicesByCategory(category);
} else {
voiceOptions.innerHTML = '<p>请先选择音色分类</p>';
selectedVoiceType = null;
}
});
generateBtn.addEventListener('click', generateAudio);
// 初始化
loadVoiceCategories();
});
</script>
</body>
</html>