diff --git a/dsLightRag/Test/TTS/T1_LLM.py b/dsLightRag/Test/TTS/T1_LLM.py new file mode 100644 index 00000000..4cbf330e --- /dev/null +++ b/dsLightRag/Test/TTS/T1_LLM.py @@ -0,0 +1,47 @@ +import re +import sys +import os +sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) +from Util.LlmUtil import get_llm_response + +def stream_and_split_text(prompt): + """ + 流式获取LLM输出并按句子分割 + @param prompt: 提示文本 + @return: 生成器,每次产生一个完整句子 + """ + buffer = "" + + # 使用LlmUtil中的get_llm_response函数获取流式响应 + for content in get_llm_response(prompt, stream=True): + buffer += content + + # 使用正则表达式检测句子结束 + sentences = re.split(r'([。!?.!?])', buffer) + if len(sentences) > 1: + # 提取完整句子 + for i in range(0, len(sentences)-1, 2): + if i+1 < len(sentences): + sentence = sentences[i] + sentences[i+1] + yield sentence + + # 保留不完整的部分 + buffer = sentences[-1] + + # 处理最后剩余的部分 + if buffer: + yield buffer + +def main(): + """ + 测试stream_and_split_text函数 + """ + test_prompt = "请简单介绍一下人工智能的发展历史。包括从图灵测试到深度学习的演进过程。" + print("测试文本:", test_prompt) + print("\n分割后的句子:") + + for i, sentence in enumerate(stream_and_split_text(test_prompt), 1): + print(f"句子 {i}: {sentence}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/dsLightRag/Test/TTS/T2_StreamingVolanoTTS.py b/dsLightRag/Test/TTS/T2_StreamingVolanoTTS.py new file mode 100644 index 00000000..b251b99c --- /dev/null +++ b/dsLightRag/Test/TTS/T2_StreamingVolanoTTS.py @@ -0,0 +1,156 @@ +import asyncio +import json +import uuid +import websockets +import os +from queue import Queue +from Config import Config +from Util.TTS_Protocols import full_client_request, receive_message, MsgType, EventType + + +class StreamingVolcanoTTS: + def __init__(self, voice_type='zh_female_wanwanxiaohe_moon_bigtts', encoding='wav', max_concurrency=2): + self.voice_type = voice_type + self.encoding = encoding + self.app_key = Config.HS_APP_ID + self.access_token = Config.HS_ACCESS_TOKEN + self.endpoint = "wss://openspeech.bytedance.com/api/v3/tts/unidirectional/stream" + self.audio_queue = Queue() + self.max_concurrency = max_concurrency # 最大并发数 + self.semaphore = asyncio.Semaphore(max_concurrency) # 并发控制信号量 + + @staticmethod + def get_resource_id(voice: str) -> str: + if voice.startswith("S_"): + return "volc.megatts.default" + return "volc.service_type.10029" + + async def synthesize_stream(self, text_stream, audio_callback): + """ + 流式合成语音 + + Args: + text_stream: 文本流生成器 + audio_callback: 音频数据回调函数,接收音频片段 + """ + # 为每个文本片段创建一个WebSocket连接,但限制并发数 + tasks = [] + for text in text_stream: + if text.strip(): # 忽略空文本 + task = asyncio.create_task(self._synthesize_single_with_semaphore(text, audio_callback)) + tasks.append(task) + + # 等待所有任务完成 + await asyncio.gather(*tasks) + + async def _synthesize_single_with_semaphore(self, text, audio_callback): + """使用信号量控制并发数的单个文本合成""" + async with self.semaphore: # 获取信号量,限制并发数 + await self._synthesize_single(text, audio_callback) + + async def _synthesize_single(self, text, audio_callback): + """合成单个文本片段""" + headers = { + "X-Api-App-Key": self.app_key, + "X-Api-Access-Key": self.access_token, + "X-Api-Resource-Id": self.get_resource_id(self.voice_type), + "X-Api-Connect-Id": str(uuid.uuid4()), + } + + websocket = await websockets.connect( + self.endpoint, additional_headers=headers, max_size=10 * 1024 * 1024 + ) + + try: + request = { + "user": { + "uid": str(uuid.uuid4()), + }, + "req_params": { + "speaker": self.voice_type, + "audio_params": { + "format": self.encoding, + "sample_rate": 24000, + "enable_timestamp": True, + }, + "text": text, + "additions": json.dumps({"disable_markdown_filter": False}), + }, + } + + # 发送请求 + await full_client_request(websocket, json.dumps(request).encode()) + + # 接收音频数据 + audio_data = bytearray() + while True: + msg = await receive_message(websocket) + + if msg.type == MsgType.FullServerResponse: + if msg.event == EventType.SessionFinished: + break + elif msg.type == MsgType.AudioOnlyServer: + audio_data.extend(msg.payload) + else: + raise RuntimeError(f"TTS conversion failed: {msg}") + + # 通过回调函数返回音频数据 + if audio_data: + audio_callback(audio_data) + + finally: + await websocket.close() + + +def audio_callback(audio_data): + """ + 音频数据回调函数,将音频数据保存到文件 + """ + # 创建输出目录 + output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output") + os.makedirs(output_dir, exist_ok=True) + + # 生成文件名 + filename = f"streaming_tts_{uuid.uuid4().hex[:8]}.wav" + filepath = os.path.join(output_dir, filename) + + # 保存音频文件 + with open(filepath, "wb") as f: + f.write(audio_data) + + print(f"音频片段已保存到: {filepath}") + + +async def test_streaming_tts(): + """ + 测试流式TTS功能 + """ + # 创建TTS实例 + tts = StreamingVolcanoTTS() + + # 准备测试文本流 + test_texts = [ + "你好,我是火山引擎的语音合成服务。", + "这是一个流式语音合成的测试。", + "我们将文本分成多个片段进行合成。", + "这样可以减少等待时间,提高用户体验。" + ] + + print("开始测试流式TTS...") + print(f"测试文本: {test_texts}") + + # 调用流式合成 + await tts.synthesize_stream(test_texts, audio_callback) + + print("流式TTS测试完成!") + + +def main(): + """ + 主函数,运行测试 + """ + asyncio.run(test_streaming_tts()) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/dsLightRag/Test/TTS/T3_TTS_Pipeline.py b/dsLightRag/Test/TTS/T3_TTS_Pipeline.py new file mode 100644 index 00000000..d8252847 --- /dev/null +++ b/dsLightRag/Test/TTS/T3_TTS_Pipeline.py @@ -0,0 +1,88 @@ +import asyncio +import os +import sys +import uuid + +# 添加路径以导入其他模块 +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from T1_LLM import stream_and_split_text +from T2_StreamingVolanoTTS import StreamingVolcanoTTS + + +async def streaming_tts_pipeline(prompt, audio_callback): + """ + 流式TTS管道:获取LLM流式输出并断句,然后使用TTS合成语音 + + Args: + prompt: 提示文本 + audio_callback: 音频数据回调函数 + """ + # 1. 获取LLM流式输出并断句 + text_stream = stream_and_split_text(prompt) + + # 2. 初始化TTS处理器 + tts = StreamingVolcanoTTS() + + # 3. 流式处理文本并生成音频 + await tts.synthesize_stream(text_stream, audio_callback) + + +def save_audio_callback(output_dir=None): + """ + 创建一个音频回调函数,用于保存音频数据到文件 + + Args: + output_dir: 输出目录,默认为当前文件所在目录下的output文件夹 + + Returns: + 音频回调函数 + """ + if output_dir is None: + output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output") + + # 确保输出目录存在 + os.makedirs(output_dir, exist_ok=True) + + def callback(audio_data): + # 生成文件名 + filename = f"pipeline_tts_{uuid.uuid4().hex[:8]}.wav" + filepath = os.path.join(output_dir, filename) + + # 保存音频文件 + with open(filepath, "wb") as f: + f.write(audio_data) + + print(f"音频片段已保存到: {filepath} ({len(audio_data)} 字节)") + + return callback + + +async def test_pipeline(): + """ + 测试流式TTS管道 + """ + # 创建音频回调函数 + audio_handler = save_audio_callback() + + # 测试提示 + prompt = "请详细解释一下量子力学的基本原理,包括波粒二象性、不确定性原理和薛定谔方程。" + + print("开始测试流式TTS管道...") + print(f"测试提示: {prompt}") + print("等待LLM生成文本并转换为语音...") + + # 运行管道 + await streaming_tts_pipeline(prompt, audio_handler) + + print("流式TTS管道测试完成!") + + +def main(): + """ + 主函数,运行测试 + """ + asyncio.run(test_pipeline()) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/dsLightRag/Test/TTS/__pycache__/T1_LLM.cpython-310.pyc b/dsLightRag/Test/TTS/__pycache__/T1_LLM.cpython-310.pyc new file mode 100644 index 00000000..cd571beb Binary files /dev/null and b/dsLightRag/Test/TTS/__pycache__/T1_LLM.cpython-310.pyc differ diff --git a/dsLightRag/Test/TTS/__pycache__/T2_StreamingVolanoTTS.cpython-310.pyc b/dsLightRag/Test/TTS/__pycache__/T2_StreamingVolanoTTS.cpython-310.pyc new file mode 100644 index 00000000..63accfaf Binary files /dev/null and b/dsLightRag/Test/TTS/__pycache__/T2_StreamingVolanoTTS.cpython-310.pyc differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_08730ffd.wav b/dsLightRag/Test/TTS/output/pipeline_tts_08730ffd.wav new file mode 100644 index 00000000..646191ca Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_08730ffd.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_0887bd4e.wav b/dsLightRag/Test/TTS/output/pipeline_tts_0887bd4e.wav new file mode 100644 index 00000000..1f304a39 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_0887bd4e.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_213e1a10.wav b/dsLightRag/Test/TTS/output/pipeline_tts_213e1a10.wav new file mode 100644 index 00000000..ce1fafd9 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_213e1a10.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_25be617a.wav b/dsLightRag/Test/TTS/output/pipeline_tts_25be617a.wav new file mode 100644 index 00000000..30a982c7 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_25be617a.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_2811e355.wav b/dsLightRag/Test/TTS/output/pipeline_tts_2811e355.wav new file mode 100644 index 00000000..d242ec74 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_2811e355.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_2fdd5f98.wav b/dsLightRag/Test/TTS/output/pipeline_tts_2fdd5f98.wav new file mode 100644 index 00000000..87f31433 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_2fdd5f98.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_333cf329.wav b/dsLightRag/Test/TTS/output/pipeline_tts_333cf329.wav new file mode 100644 index 00000000..bcda8853 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_333cf329.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_36f27419.wav b/dsLightRag/Test/TTS/output/pipeline_tts_36f27419.wav new file mode 100644 index 00000000..aa0e2595 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_36f27419.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_446a5fae.wav b/dsLightRag/Test/TTS/output/pipeline_tts_446a5fae.wav new file mode 100644 index 00000000..79ffba21 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_446a5fae.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_4fa08a23.wav b/dsLightRag/Test/TTS/output/pipeline_tts_4fa08a23.wav new file mode 100644 index 00000000..27ea0bb3 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_4fa08a23.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_53a256c0.wav b/dsLightRag/Test/TTS/output/pipeline_tts_53a256c0.wav new file mode 100644 index 00000000..b1550a78 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_53a256c0.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_58f66415.wav b/dsLightRag/Test/TTS/output/pipeline_tts_58f66415.wav new file mode 100644 index 00000000..9074d50f Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_58f66415.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_63d85d6a.wav b/dsLightRag/Test/TTS/output/pipeline_tts_63d85d6a.wav new file mode 100644 index 00000000..75baf3b1 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_63d85d6a.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_6a18ae85.wav b/dsLightRag/Test/TTS/output/pipeline_tts_6a18ae85.wav new file mode 100644 index 00000000..9dc31ded Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_6a18ae85.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_8a5da962.wav b/dsLightRag/Test/TTS/output/pipeline_tts_8a5da962.wav new file mode 100644 index 00000000..29be9a18 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_8a5da962.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_95186633.wav b/dsLightRag/Test/TTS/output/pipeline_tts_95186633.wav new file mode 100644 index 00000000..65df2b32 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_95186633.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_9c3dc95f.wav b/dsLightRag/Test/TTS/output/pipeline_tts_9c3dc95f.wav new file mode 100644 index 00000000..197584cf Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_9c3dc95f.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_abbf997c.wav b/dsLightRag/Test/TTS/output/pipeline_tts_abbf997c.wav new file mode 100644 index 00000000..68212db3 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_abbf997c.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_b2427c95.wav b/dsLightRag/Test/TTS/output/pipeline_tts_b2427c95.wav new file mode 100644 index 00000000..712d7e01 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_b2427c95.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_b8560631.wav b/dsLightRag/Test/TTS/output/pipeline_tts_b8560631.wav new file mode 100644 index 00000000..833b8cdd Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_b8560631.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_c06050b3.wav b/dsLightRag/Test/TTS/output/pipeline_tts_c06050b3.wav new file mode 100644 index 00000000..22d16ce5 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_c06050b3.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_c64e1f40.wav b/dsLightRag/Test/TTS/output/pipeline_tts_c64e1f40.wav new file mode 100644 index 00000000..f112c7fa Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_c64e1f40.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_d1d0c0d0.wav b/dsLightRag/Test/TTS/output/pipeline_tts_d1d0c0d0.wav new file mode 100644 index 00000000..866333a8 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_d1d0c0d0.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_d784b508.wav b/dsLightRag/Test/TTS/output/pipeline_tts_d784b508.wav new file mode 100644 index 00000000..3f075523 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_d784b508.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_ecf9de10.wav b/dsLightRag/Test/TTS/output/pipeline_tts_ecf9de10.wav new file mode 100644 index 00000000..9e738ad4 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_ecf9de10.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_f059331d.wav b/dsLightRag/Test/TTS/output/pipeline_tts_f059331d.wav new file mode 100644 index 00000000..a5cd52d2 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_f059331d.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_f4de5e7b.wav b/dsLightRag/Test/TTS/output/pipeline_tts_f4de5e7b.wav new file mode 100644 index 00000000..70706b85 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_f4de5e7b.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_f778037b.wav b/dsLightRag/Test/TTS/output/pipeline_tts_f778037b.wav new file mode 100644 index 00000000..8de0b523 Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_f778037b.wav differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_fd9c5d9e.wav b/dsLightRag/Test/TTS/output/pipeline_tts_fd9c5d9e.wav new file mode 100644 index 00000000..5aae6efb Binary files /dev/null and b/dsLightRag/Test/TTS/output/pipeline_tts_fd9c5d9e.wav differ diff --git a/dsLightRag/Test/Test_TTS.py b/dsLightRag/Test/Test_TTS.py deleted file mode 100644 index 8f93c529..00000000 --- a/dsLightRag/Test/Test_TTS.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python3 -import json -import logging -import uuid -# pip install websockets -import websockets - -from Config import Config -from Util.TTS_Protocols import EventType, MsgType, full_client_request, receive_message - -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - - -def get_resource_id(voice: str) -> str: - if voice.startswith("S_"): - return "volc.megatts.default" - return "volc.service_type.10029" - - -text = '你好,我是火山引擎的语音合成服务。这是一个美好的旅程。' # 文本 -voice_type = 'zh_female_wanwanxiaohe_moon_bigtts' # 湾湾小何 -encoding = 'wav' # 输出文件编码 -endpoint = "wss://openspeech.bytedance.com/api/v3/tts/unidirectional/stream" # WebSocket端点URL - - -async def main(): - # Connect to server - headers = { - "X-Api-App-Key": Config.HS_APP_ID, - "X-Api-Access-Key": Config.HS_ACCESS_TOKEN, - "X-Api-Resource-Id": get_resource_id(voice_type), - "X-Api-Connect-Id": str(uuid.uuid4()), - } - - logger.info(f"Connecting to {endpoint} with headers: {headers}") - websocket = await websockets.connect( - endpoint, additional_headers=headers, max_size=10 * 1024 * 1024 - ) - logger.info( - f"Connected to WebSocket server, Logid: {websocket.response.headers['x-tt-logid']}", - ) - - try: - # Prepare request payload - request = { - "user": { - "uid": str(uuid.uuid4()), - }, - "req_params": { - "speaker": voice_type, - "audio_params": { - "format": encoding, - "sample_rate": 24000, - "enable_timestamp": True, - }, - "text": text, - "additions": json.dumps( - { - "disable_markdown_filter": False, - } - ), - }, - } - - # Send request - await full_client_request(websocket, json.dumps(request).encode()) - - # Receive audio data - audio_data = bytearray() - while True: - msg = await receive_message(websocket) - - if msg.type == MsgType.FullServerResponse: - if msg.event == EventType.SessionFinished: - break - elif msg.type == MsgType.AudioOnlyServer: - audio_data.extend(msg.payload) - else: - raise RuntimeError(f"TTS conversion failed: {msg}") - - # Check if we received any audio data - if not audio_data: - raise RuntimeError("No audio data received") - - # Save audio file - filename = f"{voice_type}.{encoding}" - with open(filename, "wb") as f: - f.write(audio_data) - logger.info(f"Audio received: {len(audio_data)}, saved to {filename}") - - finally: - await websocket.close() - logger.info("Connection closed") - - -if __name__ == "__main__": - import asyncio - - asyncio.run(main()) diff --git a/dsLightRag/Test/zh_female_wanwanxiaohe_moon_bigtts.wav b/dsLightRag/Test/zh_female_wanwanxiaohe_moon_bigtts.wav deleted file mode 100644 index bad124bc..00000000 Binary files a/dsLightRag/Test/zh_female_wanwanxiaohe_moon_bigtts.wav and /dev/null differ