'commit'

2025-09-02 06:55:13 +08:00
parent 4078acb909
commit 1b959b3ba9
11 changed files with 1106 additions and 74 deletions
--- a/dsLightRag/Routes/VideoRetalkRoute.py
+++ b/dsLightRag/Routes/VideoRetalkRoute.py
@@ -0,0 +1,132 @@
+import datetime
+import logging
+import uuid
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException, Query, Request
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+
+from Config import Config
+from Util.VideoRetalk import VideoRetalk
+
+# 创建视频生成路由
+router = APIRouter(prefix="/api/video", tags=["视频生成"])
+
+# 配置日志
+logger = logging.getLogger(__name__)
+
+# 仅保留视频相关模型定义
+class VideoRetalkRequest(BaseModel):
+    """视频生成请求参数"""
+    image_url: str
+    audio_url: str
+    template_id: Optional[str] = "normal"
+    eye_move_freq: Optional[float] = 0.5
+    video_fps: Optional[int] = 30
+    mouth_move_strength: Optional[float] = 1.0
+    paste_back: Optional[bool] = True
+    head_move_strength: Optional[float] = 0.7
+
+
+class VideoRetalkResponse(BaseModel):
+    """视频生成响应"""
+    success: bool
+    message: str
+    task_id: Optional[str] = None
+    video_url: Optional[str] = None
+    video_duration: Optional[float] = None
+    video_ratio: Optional[str] = None
+    request_id: Optional[str] = None
+
+
+@router.post("/generate", response_model=VideoRetalkResponse)
+async def generate_video(request: VideoRetalkRequest):
+    """
+    生成人物朗读视频接口
+    根据输入的人物图片和音频，生成口型匹配的朗读视频
+    """
+    try:
+        # 初始化VideoRetalk实例
+        video_retalk = VideoRetalk(Config.ALY_LLM_API_KEY)
+        
+        # 调用视频生成方法
+        video_url = video_retalk.generate_video(
+            image_url=request.image_url,
+            audio_url=request.audio_url,
+            template_id=request.template_id,
+            eye_move_freq=request.eye_move_freq,
+            video_fps=request.video_fps,
+            mouth_move_strength=request.mouth_move_strength,
+            paste_back=request.paste_back,
+            head_move_strength=request.head_move_strength
+        )
+        
+        if video_url:
+            return VideoRetalkResponse(
+                success=True,
+                message="视频生成成功",
+                video_url=video_url,
+                # 以下字段在实际实现中可以从API响应中获取
+                task_id=str(uuid.uuid4()),
+                video_duration=10.23,  # 示例值，实际应从API响应获取
+                video_ratio="standard",  # 示例值，实际应从API响应获取
+                request_id=str(uuid.uuid4())
+            )
+        else:
+            return VideoRetalkResponse(
+                success=False,
+                message="视频生成失败"
+            )
+            
+    except Exception as e:
+        logger.error(f"视频生成接口错误: {e}")
+        raise HTTPException(
+            status_code=500, 
+            detail=f"视频生成失败: {str(e)}"
+        )
+
+
+@router.get("/task/status")
+async def get_task_status(task_id: str = Query(..., description="任务ID")):
+    """
+    查询视频生成任务状态
+    """
+    try:
+        video_retalk = VideoRetalk(Config.ALY_LLM_API_KEY)
+        task_status = video_retalk.get_task_status(task_id)
+        
+        return {
+            "success": True,
+            "data": task_status
+        }
+        
+    except Exception as e:
+        logger.error(f"查询任务状态错误: {e}")
+        raise HTTPException(
+            status_code=500, 
+            detail=f"查询任务状态失败: {str(e)}"
+        )
+
+
+@router.get("/health")
+async def health_check():
+    """
+    健康检查接口
+    """
+    return {
+        "status": "healthy",
+        "timestamp": datetime.datetime.now().isoformat(),
+        "service": "VideoRetalk API"
+    }
+
+
+# 保留全局异常处理
+def global_exception_handler(request: Request, exc: Exception):
+    logger.error(f"全局异常: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={"success": False, "message": f"服务器内部错误: {str(exc)}"}
+    )
+
+
--- a/dsLightRag/Routes/pycache/VideoRetalkRoute.cpython-310.pyc
+++ b/dsLightRag/Routes/pycache/VideoRetalkRoute.cpython-310.pyc
--- a/dsLightRag/Routes/pycache/ttsRoute.cpython-310.pyc
+++ b/dsLightRag/Routes/pycache/ttsRoute.cpython-310.pyc
--- a/dsLightRag/Routes/ttsRoute.py
+++ b/dsLightRag/Routes/ttsRoute.py
@@ -0,0 +1,145 @@
+import logging
+import uuid
+from typing import Optional
+
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+
+from Util.GengerateAudio import ByteDanceTTS
+
+# 创建声音生成路由
+router = APIRouter(prefix="/api/tts", tags=["声音生成"])
+
+# 配置日志
+logger = logging.getLogger(__name__)
+
+# 初始化TTS实例
+tts_instance = ByteDanceTTS()
+
+class TextToSpeechRequest(BaseModel):
+    """文本转语音请求参数"""
+    text: str
+    voice_type: Optional[str] = None
+    speed_ratio: Optional[float] = 1.0
+    volume_ratio: Optional[float] = 1.0
+    pitch_ratio: Optional[float] = 1.0
+    encoding: Optional[str] = "mp3"
+
+
+class TextToSpeechResponse(BaseModel):
+    """文本转语音响应"""
+    success: bool
+    message: str
+    audio_url: Optional[str] = None
+    audio_size: Optional[float] = None
+    audio_format: Optional[str] = None
+    request_id: Optional[str] = None
+
+
+@router.get("/voices/categories")
+async def get_voice_categories():
+    """
+    获取所有音色分类接口
+    返回所有可用的音色分类列表
+    """
+    try:
+        categories = tts_instance.get_all_categories()
+        return {
+            "success": True,
+            "data": categories,
+            "message": "获取音色分类成功"
+        }
+    except Exception as e:
+        logger.error(f"获取音色分类错误: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"获取音色分类失败: {str(e)}"
+        )
+
+
+@router.get("/voices/by-category/{category}")
+async def get_voices_by_category(category: str):
+    """
+    根据分类获取音色列表接口
+    Args:
+        category: 音色分类名称
+    返回指定分类下的所有音色列表
+    """
+    try:
+        voices = tts_instance.get_voices_by_category(category)
+        if not voices:
+            return {
+                "success": False,
+                "message": f"未找到分类 '{category}' 下的音色"
+            }
+            
+        return {
+            "success": True,
+            "data": voices,
+            "message": f"获取分类 '{category}' 下的音色成功"
+        }
+    except Exception as e:
+        logger.error(f"获取分类 '{category}' 下的音色错误: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"获取分类 '{category}' 下的音色失败: {str(e)}"
+        )
+
+
+@router.get("/voices/all")
+async def get_all_voices():
+    """
+    获取所有音色分类和音色列表接口
+    返回所有音色分类和每个分类下的音色列表
+    """
+    try:
+        all_voices = tts_instance.get_all_voices()
+        return {
+            "success": True,
+            "data": all_voices,
+            "message": "获取所有音色分类和列表成功"
+        }
+    except Exception as e:
+        logger.error(f"获取所有音色分类和列表错误: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"获取所有音色分类和列表失败: {str(e)}"
+        )
+
+
+@router.post("/generate", response_model=TextToSpeechResponse)
+async def generate_audio(request: TextToSpeechRequest):
+    """
+    文本转语音接口
+    根据输入文本和语音参数生成音频文件
+    """
+    try:
+        # 调用TTS工具生成音频
+        audio_url = tts_instance.generate_audio(
+            text=request.text,
+            voice_type=request.voice_type,
+            speed_ratio=request.speed_ratio,
+            volume_ratio=request.volume_ratio,
+            pitch_ratio=request.pitch_ratio,
+            encoding=request.encoding
+        )
+        
+        if audio_url:
+            return TextToSpeechResponse(
+                success=True,
+                message="音频生成成功",
+                audio_url=audio_url,
+                audio_format=request.encoding,
+                request_id=str(uuid.uuid4())
+            )
+        else:
+            return TextToSpeechResponse(
+                success=False,
+                message="音频生成失败"
+            )
+    except Exception as e:
+        logger.error(f"文本转语音接口错误: {e}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"音频生成失败: {str(e)}"
+        )
--- a/dsLightRag/Start.py
+++ b/dsLightRag/Start.py
@@ -27,7 +27,8 @@ from Routes.TeachingModel.tasks.BackgroundTasks import train_document_task
 from Routes.XueBanRoute import router as xueban_router
 from Routes.ZuoWen import router as zuowen_router
 from Routes.RecognizeEduQuestion import router as ocr_router
-
+from Routes.VideoRetalkRoute import router as videoRetalk_router
+from Routes.ttsRoute import router as tts_router
 # 控制日志输出
 logger = logging.getLogger('lightrag')
 logger.setLevel(logging.INFO)
@@ -80,6 +81,10 @@ app.include_router(mj_router)  # Midjourney路由
 app.include_router(qwen_image_router)  # Qwen Image 路由
 app.include_router(ocr_router)  # 教育场景识别

+app.include_router(videoRetalk_router) # 视频复读
+app.include_router(tts_router) # 文本转语音
+
+
 # Teaching Model 相关路由
 # 登录相关（不用登录）
 app.include_router(login_router, prefix="/api/login", tags=["login"])
--- a/dsLightRag/Test/GengerateAudio.py
+++ b/dsLightRag/Test/GengerateAudio.py
@@ -1,73 +0,0 @@
-#coding=utf-8
-
-'''
-requires Python 3.6 or later
-pip install requests
-'''
-import base64
-import json
-import uuid
-import requests
-
-from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
-
-# 填写平台申请的appid, access_token以及cluster
-appid = HS_APP_ID
-access_token= HS_ACCESS_TOKEN
-cluster = HS_CLUSTER_ID
-
-voice_type = HS_VOICE_TYPE_QINCANG
-host = "openspeech.bytedance.com"
-api_url = f"https://{host}/api/v1/tts"
-
-header = {"Authorization": f"Bearer;{access_token}"}
-
-request_json = {
-    "app": {
-        "appid": appid,
-        "token": "access_token",
-        "cluster": cluster
-    },
-    "user": {
-        "uid": "388808087185088"
-    },
-    "audio": {
-        "voice_type": voice_type,
-        "encoding": "mp3",
-        "speed_ratio": 1.0,
-        "volume_ratio": 1.0,
-        "pitch_ratio": 1.0,
-    },
-    "request": {
-        "reqid": str(uuid.uuid4()),
-        "text": """
-        君不见，黄河之水天上来，奔流到海不复回。
-        君不见，高堂明镜悲白发，朝如青丝暮成雪。
-        人生得意须尽欢，莫使金樽空对月。
-        天生我材必有用，千金散尽还复来。
-        烹羊宰牛且为乐，会须一饮三百杯。
-        岑夫子，丹丘生，将进酒，杯莫停。
-        与君歌一曲，请君为我倾耳听。
-        钟鼓馔玉不足贵，但愿长醉不复醒。
-        古来圣贤皆寂寞，惟有饮者留其名。
-        陈王昔时宴平乐，斗酒十千恣欢谑。
-        主人何为言少钱，径须沽取对君酌。
-        五花马，千金裘，呼儿将出换美酒，与尔同销万古愁。
-        """,
-        "text_type": "plain",
-        "operation": "query",
-        "with_frontend": 1,
-        "frontend_type": "unitTson"
-    }
-}
-
-if __name__ == '__main__':
-    try:
-        resp = requests.post(api_url, json.dumps(request_json), headers=header)
-        #print(f"resp body: \n{resp.json()}")
-        if "data" in resp.json():
-            data = resp.json()["data"]
-            file_to_save = open("test_submit.mp3", "wb")
-            file_to_save.write(base64.b64decode(data))
-    except Exception as e:
-        e.with_traceback()
--- a/dsLightRag/Util/GengerateAudio.py
+++ b/dsLightRag/Util/GengerateAudio.py
@@ -0,0 +1,284 @@
+#coding=utf-8
+
+'''
+字节跳动语音合成API封装类
+requires Python 3.6 or later
+pip install requests
+'''
+import base64
+import json
+import uuid
+import requests
+from typing import Optional, Dict, Any
+from pathlib import Path
+
+from Config.Config import HS_APP_ID, HS_ACCESS_TOKEN, HS_CLUSTER_ID, HS_VOICE_TYPE_QINCANG
+
+
+# 在ByteDanceTTS类中添加以下音色分类字典
+
+class ByteDanceTTS:
+    """
+    字节跳动语音合成API封装类
+    提供文本转语音功能
+    """
+    
+    # 音色分类字典
+    TTS_VOICES = {
+        "通用场景": {
+            "zh_female_xiaoxue_moon_bigtts": "小雪（女声，温柔亲切）",
+            "zh_male_xiaofeng_common": "小峰（男声，沉稳大气）",
+            "zh_female_xiaoxin_common": "小新（女声，自然流畅）",
+            "zh_male_xiaoyu_common": "小鱼（男声，年轻活力）"
+        },
+        "有声阅读": {
+            "zh_female_xiaoxue_moon_bigtts": "小雪（女声，温柔亲切）",
+            "zh_female_xiaoxin_common": "小新（女声，自然流畅）",
+            "zh_female_xiaomei_moon_bigtts": "小美（女声，甜美温柔）",
+            "zh_female_xiaoli_moon_bigtts": "小丽（女声，清晰标准）"
+        },
+        "智能助手": {
+            "zh_female_xiaoxue_moon_bigtts": "小雪（女声，温柔亲切）",
+            "zh_male_xiaofeng_common": "小峰（男声，沉稳大气）",
+            "zh_female_xiaoxin_common": "小新（女声，自然流畅）",
+            "zh_male_xiaoyu_common": "小鱼（男声，年轻活力）"
+        },
+        "视频配音": {
+            "zh_male_xiaofeng_common": "小峰（男声，沉稳大气）",
+            "zh_female_xiaomei_moon_bigtts": "小美（女声，甜美温柔）",
+            "zh_female_xiaoli_moon_bigtts": "小丽（女声，清晰标准）",
+            "zh_male_xiaoyu_common": "小鱼（男声，年轻活力）"
+        },
+        "特色音色": {
+            "zh_female_xiaoxue_moon_bigtts": "小雪（女声，温柔亲切）",
+            "zh_female_xiaomei_moon_bigtts": "小美（女声，甜美温柔）"
+        },
+        "广告配音": {
+            "zh_male_xiaofeng_common": "小峰（男声，沉稳大气）",
+            "zh_female_xiaoli_moon_bigtts": "小丽（女声，清晰标准）"
+        },
+        "新闻播报": {
+            "zh_female_xiaoli_moon_bigtts": "小丽（女声，清晰标准）",
+            "zh_male_xiaofeng_common": "小峰（男声，沉稳大气）"
+        },
+        "教育场景": {
+            "zh_female_xiaoxin_common": "小新（女声，自然流畅）",
+            "zh_male_xiaoyu_common": "小鱼（男声，年轻活力）"
+        }
+    }
+    
+    def __init__(self, 
+                 app_id: Optional[str] = None,
+                 access_token: Optional[str] = None,
+                 cluster_id: Optional[str] = None,
+                 voice_type: Optional[str] = None):
+        """
+        初始化语音合成类
+        
+        Args:
+            app_id: 应用ID，默认为Config中的HS_APP_ID
+            access_token: 访问令牌，默认为Config中的HS_ACCESS_TOKEN
+            cluster_id: 集群ID，默认为Config中的HS_CLUSTER_ID
+            voice_type: 声音类型，默认为Config中的HS_VOICE_TYPE_QINCANG
+        """
+        self.app_id = app_id or HS_APP_ID
+        self.access_token = access_token or HS_ACCESS_TOKEN
+        self.cluster_id = cluster_id or HS_CLUSTER_ID
+        self.voice_type = voice_type or HS_VOICE_TYPE_QINCANG
+        
+        self.host = "openspeech.bytedance.com"
+        self.api_url = f"https://{self.host}/api/v1/tts"
+        self.header = {"Authorization": f"Bearer;{self.access_token}"}
+    
+    def generate_audio(self, 
+                       text: str, 
+                       output_path: Optional[str] = None,
+                       voice_type: Optional[str] = None,
+                       encoding: str = "mp3",
+                       speed_ratio: float = 1.0,
+                       volume_ratio: float = 1.0,
+                       pitch_ratio: float = 1.0,
+                       text_type: str = "plain",
+                       operation: str = "query") -> Optional[bytes]:
+        """
+        生成语音音频
+        
+        Args:
+            text: 要转换的文本内容
+            output_path: 输出文件路径，如果提供则保存为文件
+            voice_type: 声音类型，覆盖初始化设置
+            encoding: 音频编码格式，默认mp3
+            speed_ratio: 语速比例，默认1.0
+            volume_ratio: 音量比例，默认1.0
+            pitch_ratio: 音调比例，默认1.0
+            text_type: 文本类型，默认plain
+            operation: 操作类型，默认query
+            
+        Returns:
+            bytes: 音频二进制数据，失败返回None
+        """
+        # 构建请求JSON
+        request_json = {
+            "app": {
+                "appid": self.app_id,
+                "token": "access_token",
+                "cluster": self.cluster_id
+            },
+            "user": {
+                "uid": str(uuid.uuid4())  # 使用随机用户ID
+            },
+            "audio": {
+                "voice_type": voice_type or self.voice_type,
+                "encoding": encoding,
+                "speed_ratio": speed_ratio,
+                "volume_ratio": volume_ratio,
+                "pitch_ratio": pitch_ratio,
+            },
+            "request": {
+                "reqid": str(uuid.uuid4()),
+                "text": text,
+                "text_type": text_type,
+                "operation": operation,
+                "with_frontend": 1,
+                "frontend_type": "unitTson"
+            }
+        }
+        
+        try:
+            # 发送请求
+            resp = requests.post(self.api_url, json.dumps(request_json), headers=self.header)
+            resp.raise_for_status()
+            
+            resp_data = resp.json()
+            
+            if "data" in resp_data:
+                audio_data = base64.b64decode(resp_data["data"])
+                
+                # 如果提供了输出路径，保存文件
+                if output_path:
+                    self.save_audio(audio_data, output_path)
+                
+                return audio_data
+            else:
+                print(f"API响应中未包含音频数据: {resp_data}")
+                return None
+                
+        except requests.exceptions.RequestException as e:
+            print(f"请求失败: {e}")
+            return None
+        except Exception as e:
+            print(f"生成音频失败: {e}")
+            return None
+    
+    def save_audio(self, audio_data: bytes, output_path: str) -> bool:
+        """
+        保存音频数据到文件
+        
+        Args:
+            audio_data: 音频二进制数据
+            output_path: 输出文件路径
+            
+        Returns:
+            bool: 保存是否成功
+        """
+        try:
+            # 确保目录存在
+            Path(output_path).parent.mkdir(parents=True, exist_ok=True)
+            
+            with open(output_path, "wb") as f:
+                f.write(audio_data)
+            
+            print(f"音频已保存到: {output_path}")
+            return True
+            
+        except Exception as e:
+            print(f"保存音频失败: {e}")
+            return False
+    
+    def get_audio_info(self, audio_data: bytes) -> Dict[str, Any]:
+        """
+        获取音频信息
+        
+        Args:
+            audio_data: 音频二进制数据
+            
+        Returns:
+            Dict: 包含音频大小和格式的信息
+        """
+        return {
+            "size_bytes": len(audio_data),
+            "size_kb": len(audio_data) / 1024,
+            "format": "mp3"  # 目前固定为mp3格式
+        }
+
+    def get_voices_by_category(self, category: str) -> Dict[str, str]:
+        """
+        根据分类获取音色列表
+        
+        Args:
+            category: 分类名称
+            
+        Returns:
+            Dict: 音色字典，key为voice_type，value为音色描述
+        """
+        return self.TTS_VOICES.get(category, {})
+    
+    def get_all_categories(self) -> list:
+        """
+        获取所有音色分类
+        
+        Returns:
+            list: 分类名称列表
+        """
+        return list(self.TTS_VOICES.keys())
+    
+    def get_all_voices(self) -> Dict[str, Dict[str, str]]:
+        """
+        获取所有音色分类和音色列表
+        
+        Returns:
+            Dict: 所有音色分类和音色列表
+        """
+        return self.TTS_VOICES
+
+
+def main():
+    """示例用法"""
+    # 创建语音合成实例
+    tts = ByteDanceTTS()
+    
+    # 要转换的文本
+    text = """
+    君不见，黄河之水天上来，奔流到海不复回。
+    君不见，高堂明镜悲白发，朝如青丝暮成雪。
+    人生得意须尽欢，莫使金樽空对月。
+    天生我材必有用，千金散尽还复来。
+    烹羊宰牛且为乐，会须一饮三百杯。
+    岑夫子，丹丘生，将进酒，杯莫停。
+    与君歌一曲，请君为我倾耳听。
+    钟鼓馔玉不足贵，但愿长醉不复醒。
+    古来圣贤皆寂寞，惟有饮者留其名。
+    陈王昔时宴平乐，斗酒十千恣欢谑。
+    主人何为言少钱，径须沽取对君酌。
+    五花马，千金裘，呼儿将出换美酒，与尔同销万古愁。
+    """
+    
+    # 生成音频并保存
+    audio_data = tts.generate_audio(
+        text=text,
+        output_path="test_submit.mp3",
+        voice_type=HS_VOICE_TYPE_QINCANG,
+        speed_ratio=1.0,
+        volume_ratio=1.0
+    )
+    
+    if audio_data:
+        # 获取音频信息
+        info = tts.get_audio_info(audio_data)
+        print(f"音频生成成功，大小: {info['size_kb']:.2f} KB")
+    else:
+        print("音频生成失败")
+
+
+if __name__ == '__main__':
+    main()
--- a/dsLightRag/Util/VideoRetalk.py
+++ b/dsLightRag/Util/VideoRetalk.py
--- a/dsLightRag/Util/pycache/GengerateAudio.cpython-310.pyc
+++ b/dsLightRag/Util/pycache/GengerateAudio.cpython-310.pyc
--- a/dsLightRag/Util/pycache/VideoRetalk.cpython-310.pyc
+++ b/dsLightRag/Util/pycache/VideoRetalk.cpython-310.pyc
--- a/dsLightRag/static/text-to-speech.html
+++ b/dsLightRag/static/text-to-speech.html
@@ -0,0 +1,539 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>文本转语音</title>
+    <style>
+        * {
+            margin: 0; padding: 0; box-sizing: border-box;
+            font-family: 'PingFang SC', 'Microsoft YaHei', sans-serif;
+        }
+        body { 
+            background-color: #f5f7fa; 
+            color: #333; 
+            line-height: 1.6; 
+        }
+        .container { 
+            max-width: 1200px; 
+            margin: 0 auto; 
+            padding: 20px; 
+        }
+        header { 
+            text-align: center; 
+            padding: 40px 20px; 
+            background: linear-gradient(135deg, #3498db, #8e44ad); 
+            color: white; 
+            border-radius: 10px; 
+            margin-bottom: 30px; 
+            box-shadow: 0 4px 15px rgba(0,0,0,0.1); 
+        }
+        h1 { 
+            font-size: 2.2rem; 
+            margin-bottom: 10px; 
+            font-weight: 700; 
+        }
+        .subtitle { 
+            font-size: 1.1rem; 
+            opacity: 0.9; 
+            max-width: 800px; 
+            margin: 0 auto; 
+        }
+        
+        .main-content {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 30px;
+            margin-bottom: 30px;
+        }
+        
+        .form-section {
+            flex: 1;
+            min-width: 300px;
+            background-color: white;
+            border-radius: 10px;
+            padding: 25px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+        }
+        
+        .result-section {
+            flex: 1;
+            min-width: 300px;
+            background-color: white;
+            border-radius: 10px;
+            padding: 25px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+        }
+        
+        .form-group {
+            margin-bottom: 20px;
+        }
+        
+        label {
+            display: block;
+            margin-bottom: 8px;
+            font-weight: 500;
+            color: #2c3e50;
+        }
+        
+        select, textarea {
+            width: 100%;
+            padding: 12px;
+            border: 1px solid #ddd;
+            border-radius: 6px;
+            font-size: 16px;
+            transition: border 0.3s;
+        }
+        
+        select:focus, textarea:focus {
+            border-color: #3498db;
+            outline: none;
+        }
+        
+        textarea {
+            min-height: 150px;
+            resize: vertical;
+        }
+        
+        .btn {
+            padding: 12px 24px;
+            border: none;
+            border-radius: 6px;
+            cursor: pointer;
+            font-size: 16px;
+            transition: background 0.3s;
+            display: inline-block;
+            text-align: center;
+        }
+        
+        .btn-primary {
+            background-color: #3498db;
+            color: white;
+        }
+        
+        .btn-primary:hover {
+            background-color: #2980b9;
+        }
+        
+        .btn:disabled {
+            background-color: #95a5a6;
+            cursor: not-allowed;
+        }
+        
+        .audio-player {
+            margin-top: 20px;
+            width: 100%;
+        }
+        
+        .loading {
+            display: none;
+            text-align: center;
+            margin: 20px 0;
+        }
+        
+        .loading.active {
+            display: block;
+        }
+        
+        .spinner {
+            border: 4px solid rgba(0, 0, 0, 0.1);
+            border-radius: 50%;
+            border-top: 4px solid #3498db;
+            width: 40px;
+            height: 40px;
+            animation: spin 1s linear infinite;
+            margin: 0 auto 15px;
+        }
+        
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        
+        .voice-options {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
+            gap: 15px;
+            margin-top: 15px;
+            min-height: 100px;
+        }
+
+        .voice-card {
+            border: 1px solid #ddd;
+            border-radius: 8px;
+            padding: 15px;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+
+        .voice-options p {
+            color: #666;
+            text-align: center;
+            padding: 20px;
+        }
+        
+        .voice-card:hover {
+            border-color: #3498db;
+            box-shadow: 0 2px 8px rgba(52, 152, 219, 0.2);
+        }
+        
+        .voice-card.selected {
+            border-color: #3498db;
+            background-color: #e8f4fd;
+        }
+        
+        .voice-name {
+            font-weight: 500;
+            margin-bottom: 5px;
+        }
+        
+        .voice-description {
+            font-size: 0.9rem;
+            color: #666;
+        }
+        
+        .error-message {
+            color: #e74c3c;
+            margin-top: 10px;
+            display: none;
+        }
+        
+        .success-message {
+            color: #2ecc71;
+            margin-top: 10px;
+            display: none;
+        }
+        
+        @media screen and (max-width: 768px) {
+            .main-content {
+                flex-direction: column;
+            }
+            
+            h1 { 
+                font-size: 1.8rem; 
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1>文本转语音</h1>
+            <p class="subtitle">选择音色，输入文本，生成高质量语音</p>
+        </header>
+        
+        <div class="main-content">
+            <div class="form-section">
+                <h2>语音设置</h2>
+                
+                <div class="form-group">
+                    <label for="category-select">音色分类</label>
+                    <select id="category-select">
+                        <option value="">请选择音色分类</option>
+                    </select>
+                </div>
+                
+                <div class="form-group">
+                    <label>选择音色</label>
+                    <div id="voice-options" class="voice-options">
+                        <p>请先选择音色分类</p>
+                    </div>
+                </div>
+                
+                <div class="form-group">
+                    <label for="text-input">输入文本</label>
+                    <textarea id="text-input" placeholder="请输入要转换为语音的文本..."></textarea>
+                </div>
+                
+                <div class="form-group">
+                    <label for="speed-ratio">语速</label>
+                    <select id="speed-ratio">
+                        <option value="0.8">较慢</option>
+                        <option value="1.0" selected>正常</option>
+                        <option value="1.2">较快</option>
+                    </select>
+                </div>
+                
+                <div class="form-group">
+                    <label for="volume-ratio">音量</label>
+                    <select id="volume-ratio">
+                        <option value="0.8">较小</option>
+                        <option value="1.0" selected>正常</option>
+                        <option value="1.2">较大</option>
+                    </select>
+                </div>
+                
+                <div class="form-group">
+                    <label for="pitch-ratio">音调</label>
+                    <select id="pitch-ratio">
+                        <option value="0.8">较低</option>
+                        <option value="1.0" selected>正常</option>
+                        <option value="1.2">较高</option>
+                    </select>
+                </div>
+                
+                <button id="generate-btn" class="btn btn-primary">生成语音</button>
+                
+                <div id="error-message" class="error-message"></div>
+                <div id="success-message" class="success-message"></div>
+            </div>
+            
+            <div class="result-section">
+                <h2>生成结果</h2>
+                
+                <div id="loading" class="loading">
+                    <div class="spinner"></div>
+                    <p>正在生成语音，请稍候...</p>
+                </div>
+                
+                <div id="audio-result" style="display: none;">
+                    <audio id="audio-player" class="audio-player" controls></audio>
+                    <div class="form-group" style="margin-top: 20px;">
+                        <button id="download-btn" class="btn btn-primary">下载音频</button>
+                    </div>
+                </div>
+                
+                <div id="empty-result" style="text-align: center; padding: 40px 0; color: #999;">
+                    <p>暂无生成结果</p>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        document.addEventListener('DOMContentLoaded', function() {
+            // 获取DOM元素
+            const categorySelect = document.getElementById('category-select');
+            const voiceOptions = document.getElementById('voice-options');
+            const textInput = document.getElementById('text-input');
+            const speedRatio = document.getElementById('speed-ratio');
+            const volumeRatio = document.getElementById('volume-ratio');
+            const pitchRatio = document.getElementById('pitch-ratio');
+            const generateBtn = document.getElementById('generate-btn');
+            const loading = document.getElementById('loading');
+            const audioResult = document.getElementById('audio-result');
+            const audioPlayer = document.getElementById('audio-player');
+            const downloadBtn = document.getElementById('download-btn');
+            const emptyResult = document.getElementById('empty-result');
+            const errorMessage = document.getElementById('error-message');
+            const successMessage = document.getElementById('success-message');
+            
+            // 当前选中的音色
+            let selectedVoiceType = null;
+            
+            // API基础URL
+            const apiBaseUrl = '/api/VideoRetalk';
+            
+            // 获取所有音色分类
+            async function loadVoiceCategories() {
+                try {
+                    const response = await fetch(`${apiBaseUrl}/voices/categories`);
+                    const data = await response.json();
+                    
+                    if (data.success) {
+                        // 清空现有选项
+                        categorySelect.innerHTML = '<option value="">请选择音色分类</option>';
+                        
+                        // 添加分类选项
+                        data.data.forEach(category => {
+                            const option = document.createElement('option');
+                            option.value = category;
+                            option.textContent = category;
+                            categorySelect.appendChild(option);
+                        });
+                    } else {
+                        showError('获取音色分类失败: ' + data.message);
+                    }
+                } catch (error) {
+                    showError('获取音色分类失败: ' + error.message);
+                }
+            }
+            
+            // 根据分类获取音色列表
+            async function loadVoicesByCategory(category) {
+                try {
+                    const response = await fetch(`${apiBaseUrl}/voices/by-category/${category}`);
+                    const data = await response.json();
+                    
+                    if (data.success) {
+                        // 清空现有音色选项
+                        voiceOptions.innerHTML = '';
+                        
+                        // 添加数据类型检查
+                        if (typeof data.data !== 'object' || data.data === null) {
+                            showError('获取的音色列表格式不正确');
+                            return;
+                        }
+
+                        // 将对象转换为数组格式 [{voice_type, name, description}, ...]
+                        const voicesArray = Object.entries(data.data).map(([voiceType, description]) => {
+                            // 从描述中提取名称和说明（假设格式为 "名称（说明）"）
+                            const match = description.match(/^(.*?)\((.*?)\)$/);
+                            return {
+                                voice_type: voiceType,
+                                name: match ? match[1] : description,
+                                description: match ? match[2] : '无描述'
+                            };
+                        });
+
+                        // 检查数组是否为空
+                        if (voicesArray.length === 0) {
+                            voiceOptions.innerHTML = '<p>该分类下没有可用音色</p>';
+                            return;
+                        }
+
+                        // 添加音色卡片
+                        voicesArray.forEach(voice => {
+                            const voiceCard = document.createElement('div');
+                            voiceCard.className = 'voice-card';
+                            voiceCard.dataset.voiceType = voice.voice_type;
+                            
+                            const voiceName = document.createElement('div');
+                            voiceName.className = 'voice-name';
+                            voiceName.textContent = voice.name;
+                            
+                            const voiceDescription = document.createElement('div');
+                            voiceDescription.className = 'voice-description';
+                            voiceDescription.textContent = voice.description || '暂无描述';
+                            
+                            voiceCard.appendChild(voiceName);
+                            voiceCard.appendChild(voiceDescription);
+                            
+                            // 添加点击事件
+                            voiceCard.addEventListener('click', function() {
+                                // 移除其他卡片的选中状态
+                                document.querySelectorAll('.voice-card').forEach(card => {
+                                    card.classList.remove('selected');
+                                });
+                                
+                                // 添加当前卡片的选中状态
+                                this.classList.add('selected');
+                                
+                                // 保存选中的音色类型
+                                selectedVoiceType = this.dataset.voiceType;
+                            });
+                            
+                            voiceOptions.appendChild(voiceCard);
+                        });
+                    } else {
+                        voiceOptions.innerHTML = '<p>获取音色列表失败: ' + data.message + '</p>';
+                    }
+                } catch (error) {
+                    voiceOptions.innerHTML = '<p>获取音色列表失败: ' + error.message + '</p>';
+                }
+            }
+            
+            // 生成语音
+            async function generateAudio() {
+                // 验证输入
+                if (!selectedVoiceType) {
+                    showError('请选择音色');
+                    return;
+                }
+                
+                if (!textInput.value.trim()) {
+                    showError('请输入要转换的文本');
+                    return;
+                }
+                
+                // 隐藏错误和成功消息
+                hideMessages();
+                
+                // 显示加载状态
+                loading.classList.add('active');
+                audioResult.style.display = 'none';
+                emptyResult.style.display = 'none';
+                generateBtn.disabled = true;
+                
+                try {
+                    // 准备请求数据
+                    const requestData = {
+                        text: textInput.value.trim(),
+                        voice_type: selectedVoiceType,
+                        speed_ratio: parseFloat(speedRatio.value),
+                        volume_ratio: parseFloat(volumeRatio.value),
+                        pitch_ratio: parseFloat(pitchRatio.value),
+                        encoding: 'mp3'
+                    };
+                    
+                    // 发送请求
+                    const response = await fetch(`${apiBaseUrl}/tts`, {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json'
+                        },
+                        body: JSON.stringify(requestData)
+                    });
+                    
+                    const data = await response.json();
+                    
+                    if (data.success) {
+                        // 显示成功消息
+                        showSuccess('语音生成成功');
+                        
+                        // 设置音频播放器
+                        audioPlayer.src = data.audio_url;
+                        audioResult.style.display = 'block';
+                        
+                        // 设置下载按钮
+                        downloadBtn.onclick = function() {
+                            const a = document.createElement('a');
+                            a.href = data.audio_url;
+                            a.download = 'tts_audio.mp3';
+                            document.body.appendChild(a);
+                            a.click();
+                            document.body.removeChild(a);
+                        };
+                    } else {
+                        showError('语音生成失败: ' + data.message);
+                        emptyResult.style.display = 'block';
+                    }
+                } catch (error) {
+                    showError('语音生成失败: ' + error.message);
+                    emptyResult.style.display = 'block';
+                } finally {
+                    // 隐藏加载状态
+                    loading.classList.remove('active');
+                    generateBtn.disabled = false;
+                }
+            }
+            
+            // 显示错误消息
+            function showError(message) {
+                errorMessage.textContent = message;
+                errorMessage.style.display = 'block';
+                successMessage.style.display = 'none';
+            }
+            
+            // 显示成功消息
+            function showSuccess(message) {
+                successMessage.textContent = message;
+                successMessage.style.display = 'block';
+                errorMessage.style.display = 'none';
+            }
+            
+            // 隐藏所有消息
+            function hideMessages() {
+                errorMessage.style.display = 'none';
+                successMessage.style.display = 'none';
+            }
+            
+            // 事件监听器
+            categorySelect.addEventListener('change', function() {
+                const category = this.value;
+                if (category) {
+                    loadVoicesByCategory(category);
+                } else {
+                    voiceOptions.innerHTML = '<p>请先选择音色分类</p>';
+                    selectedVoiceType = null;
+                }
+            });
+            
+            generateBtn.addEventListener('click', generateAudio);
+            
+            // 初始化
+            loadVoiceCategories();
+        });
+    </script>
+</body>
+</html>