diff --git a/dsLightRag/Test/TTS/T1_LLM.py b/dsLightRag/Test/TTS/T1_LLM.py deleted file mode 100644 index 4cbf330e..00000000 --- a/dsLightRag/Test/TTS/T1_LLM.py +++ /dev/null @@ -1,47 +0,0 @@ -import re -import sys -import os -sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -from Util.LlmUtil import get_llm_response - -def stream_and_split_text(prompt): - """ - 流式获取LLM输出并按句子分割 - @param prompt: 提示文本 - @return: 生成器,每次产生一个完整句子 - """ - buffer = "" - - # 使用LlmUtil中的get_llm_response函数获取流式响应 - for content in get_llm_response(prompt, stream=True): - buffer += content - - # 使用正则表达式检测句子结束 - sentences = re.split(r'([。!?.!?])', buffer) - if len(sentences) > 1: - # 提取完整句子 - for i in range(0, len(sentences)-1, 2): - if i+1 < len(sentences): - sentence = sentences[i] + sentences[i+1] - yield sentence - - # 保留不完整的部分 - buffer = sentences[-1] - - # 处理最后剩余的部分 - if buffer: - yield buffer - -def main(): - """ - 测试stream_and_split_text函数 - """ - test_prompt = "请简单介绍一下人工智能的发展历史。包括从图灵测试到深度学习的演进过程。" - print("测试文本:", test_prompt) - print("\n分割后的句子:") - - for i, sentence in enumerate(stream_and_split_text(test_prompt), 1): - print(f"句子 {i}: {sentence}") - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/dsLightRag/Test/TTS/T2_StreamingVolanoTTS.py b/dsLightRag/Test/TTS/T2_StreamingVolanoTTS.py deleted file mode 100644 index b251b99c..00000000 --- a/dsLightRag/Test/TTS/T2_StreamingVolanoTTS.py +++ /dev/null @@ -1,156 +0,0 @@ -import asyncio -import json -import uuid -import websockets -import os -from queue import Queue -from Config import Config -from Util.TTS_Protocols import full_client_request, receive_message, MsgType, EventType - - -class StreamingVolcanoTTS: - def __init__(self, voice_type='zh_female_wanwanxiaohe_moon_bigtts', encoding='wav', max_concurrency=2): - self.voice_type = voice_type - self.encoding = encoding - self.app_key = Config.HS_APP_ID - self.access_token = Config.HS_ACCESS_TOKEN - self.endpoint = "wss://openspeech.bytedance.com/api/v3/tts/unidirectional/stream" - self.audio_queue = Queue() - self.max_concurrency = max_concurrency # 最大并发数 - self.semaphore = asyncio.Semaphore(max_concurrency) # 并发控制信号量 - - @staticmethod - def get_resource_id(voice: str) -> str: - if voice.startswith("S_"): - return "volc.megatts.default" - return "volc.service_type.10029" - - async def synthesize_stream(self, text_stream, audio_callback): - """ - 流式合成语音 - - Args: - text_stream: 文本流生成器 - audio_callback: 音频数据回调函数,接收音频片段 - """ - # 为每个文本片段创建一个WebSocket连接,但限制并发数 - tasks = [] - for text in text_stream: - if text.strip(): # 忽略空文本 - task = asyncio.create_task(self._synthesize_single_with_semaphore(text, audio_callback)) - tasks.append(task) - - # 等待所有任务完成 - await asyncio.gather(*tasks) - - async def _synthesize_single_with_semaphore(self, text, audio_callback): - """使用信号量控制并发数的单个文本合成""" - async with self.semaphore: # 获取信号量,限制并发数 - await self._synthesize_single(text, audio_callback) - - async def _synthesize_single(self, text, audio_callback): - """合成单个文本片段""" - headers = { - "X-Api-App-Key": self.app_key, - "X-Api-Access-Key": self.access_token, - "X-Api-Resource-Id": self.get_resource_id(self.voice_type), - "X-Api-Connect-Id": str(uuid.uuid4()), - } - - websocket = await websockets.connect( - self.endpoint, additional_headers=headers, max_size=10 * 1024 * 1024 - ) - - try: - request = { - "user": { - "uid": str(uuid.uuid4()), - }, - "req_params": { - "speaker": self.voice_type, - "audio_params": { - "format": self.encoding, - "sample_rate": 24000, - "enable_timestamp": True, - }, - "text": text, - "additions": json.dumps({"disable_markdown_filter": False}), - }, - } - - # 发送请求 - await full_client_request(websocket, json.dumps(request).encode()) - - # 接收音频数据 - audio_data = bytearray() - while True: - msg = await receive_message(websocket) - - if msg.type == MsgType.FullServerResponse: - if msg.event == EventType.SessionFinished: - break - elif msg.type == MsgType.AudioOnlyServer: - audio_data.extend(msg.payload) - else: - raise RuntimeError(f"TTS conversion failed: {msg}") - - # 通过回调函数返回音频数据 - if audio_data: - audio_callback(audio_data) - - finally: - await websocket.close() - - -def audio_callback(audio_data): - """ - 音频数据回调函数,将音频数据保存到文件 - """ - # 创建输出目录 - output_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output") - os.makedirs(output_dir, exist_ok=True) - - # 生成文件名 - filename = f"streaming_tts_{uuid.uuid4().hex[:8]}.wav" - filepath = os.path.join(output_dir, filename) - - # 保存音频文件 - with open(filepath, "wb") as f: - f.write(audio_data) - - print(f"音频片段已保存到: {filepath}") - - -async def test_streaming_tts(): - """ - 测试流式TTS功能 - """ - # 创建TTS实例 - tts = StreamingVolcanoTTS() - - # 准备测试文本流 - test_texts = [ - "你好,我是火山引擎的语音合成服务。", - "这是一个流式语音合成的测试。", - "我们将文本分成多个片段进行合成。", - "这样可以减少等待时间,提高用户体验。" - ] - - print("开始测试流式TTS...") - print(f"测试文本: {test_texts}") - - # 调用流式合成 - await tts.synthesize_stream(test_texts, audio_callback) - - print("流式TTS测试完成!") - - -def main(): - """ - 主函数,运行测试 - """ - asyncio.run(test_streaming_tts()) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/dsLightRag/Test/TTS/T3_TTS_Pipeline.py b/dsLightRag/Test/TTS/T3_TTS_Pipeline.py index d8252847..8e096ed9 100644 --- a/dsLightRag/Test/TTS/T3_TTS_Pipeline.py +++ b/dsLightRag/Test/TTS/T3_TTS_Pipeline.py @@ -1,12 +1,137 @@ import asyncio +import json import os -import sys +import re import uuid +import websockets +from queue import Queue -# 添加路径以导入其他模块 -sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from T1_LLM import stream_and_split_text -from T2_StreamingVolanoTTS import StreamingVolcanoTTS +from Config import Config +from Util.LlmUtil import get_llm_response +from Util.TTS_Protocols import full_client_request, receive_message, MsgType, EventType + + +def stream_and_split_text(prompt): + """ + 流式获取LLM输出并按句子分割 + @param prompt: 提示文本 + @return: 生成器,每次产生一个完整句子 + """ + buffer = "" + + # 使用LlmUtil中的get_llm_response函数获取流式响应 + for content in get_llm_response(prompt, stream=True): + buffer += content + + # 使用正则表达式检测句子结束 + sentences = re.split(r'([。!?.!?])', buffer) + if len(sentences) > 1: + # 提取完整句子 + for i in range(0, len(sentences)-1, 2): + if i+1 < len(sentences): + sentence = sentences[i] + sentences[i+1] + yield sentence + + # 保留不完整的部分 + buffer = sentences[-1] + + # 处理最后剩余的部分 + if buffer: + yield buffer + + +class StreamingVolcanoTTS: + def __init__(self, voice_type='zh_female_wanwanxiaohe_moon_bigtts', encoding='wav', max_concurrency=2): + self.voice_type = voice_type + self.encoding = encoding + self.app_key = Config.HS_APP_ID + self.access_token = Config.HS_ACCESS_TOKEN + self.endpoint = "wss://openspeech.bytedance.com/api/v3/tts/unidirectional/stream" + self.audio_queue = Queue() + self.max_concurrency = max_concurrency # 最大并发数 + self.semaphore = asyncio.Semaphore(max_concurrency) # 并发控制信号量 + + @staticmethod + def get_resource_id(voice: str) -> str: + if voice.startswith("S_"): + return "volc.megatts.default" + return "volc.service_type.10029" + + async def synthesize_stream(self, text_stream, audio_callback): + """ + 流式合成语音 + + Args: + text_stream: 文本流生成器 + audio_callback: 音频数据回调函数,接收音频片段 + """ + # 为每个文本片段创建一个WebSocket连接,但限制并发数 + tasks = [] + for text in text_stream: + if text.strip(): # 忽略空文本 + task = asyncio.create_task(self._synthesize_single_with_semaphore(text, audio_callback)) + tasks.append(task) + + # 等待所有任务完成 + await asyncio.gather(*tasks) + + async def _synthesize_single_with_semaphore(self, text, audio_callback): + """使用信号量控制并发数的单个文本合成""" + async with self.semaphore: # 获取信号量,限制并发数 + await self._synthesize_single(text, audio_callback) + + async def _synthesize_single(self, text, audio_callback): + """合成单个文本片段""" + headers = { + "X-Api-App-Key": self.app_key, + "X-Api-Access-Key": self.access_token, + "X-Api-Resource-Id": self.get_resource_id(self.voice_type), + "X-Api-Connect-Id": str(uuid.uuid4()), + } + + websocket = await websockets.connect( + self.endpoint, additional_headers=headers, max_size=10 * 1024 * 1024 + ) + + try: + request = { + "user": { + "uid": str(uuid.uuid4()), + }, + "req_params": { + "speaker": self.voice_type, + "audio_params": { + "format": self.encoding, + "sample_rate": 24000, + "enable_timestamp": True, + }, + "text": text, + "additions": json.dumps({"disable_markdown_filter": False}), + }, + } + + # 发送请求 + await full_client_request(websocket, json.dumps(request).encode()) + + # 接收音频数据 + audio_data = bytearray() + while True: + msg = await receive_message(websocket) + + if msg.type == MsgType.FullServerResponse: + if msg.event == EventType.SessionFinished: + break + elif msg.type == MsgType.AudioOnlyServer: + audio_data.extend(msg.payload) + else: + raise RuntimeError(f"TTS conversion failed: {msg}") + + # 通过回调函数返回音频数据 + if audio_data: + audio_callback(audio_data) + + finally: + await websocket.close() async def streaming_tts_pipeline(prompt, audio_callback): diff --git a/dsLightRag/Test/TTS/__pycache__/T1_LLM.cpython-310.pyc b/dsLightRag/Test/TTS/__pycache__/T1_LLM.cpython-310.pyc deleted file mode 100644 index cd571beb..00000000 Binary files a/dsLightRag/Test/TTS/__pycache__/T1_LLM.cpython-310.pyc and /dev/null differ diff --git a/dsLightRag/Test/TTS/__pycache__/T2_StreamingVolanoTTS.cpython-310.pyc b/dsLightRag/Test/TTS/__pycache__/T2_StreamingVolanoTTS.cpython-310.pyc deleted file mode 100644 index 63accfaf..00000000 Binary files a/dsLightRag/Test/TTS/__pycache__/T2_StreamingVolanoTTS.cpython-310.pyc and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_08730ffd.wav b/dsLightRag/Test/TTS/output/pipeline_tts_08730ffd.wav deleted file mode 100644 index 646191ca..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_08730ffd.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_0887bd4e.wav b/dsLightRag/Test/TTS/output/pipeline_tts_0887bd4e.wav deleted file mode 100644 index 1f304a39..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_0887bd4e.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_213e1a10.wav b/dsLightRag/Test/TTS/output/pipeline_tts_213e1a10.wav deleted file mode 100644 index ce1fafd9..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_213e1a10.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_25be617a.wav b/dsLightRag/Test/TTS/output/pipeline_tts_25be617a.wav deleted file mode 100644 index 30a982c7..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_25be617a.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_2811e355.wav b/dsLightRag/Test/TTS/output/pipeline_tts_2811e355.wav deleted file mode 100644 index d242ec74..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_2811e355.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_2fdd5f98.wav b/dsLightRag/Test/TTS/output/pipeline_tts_2fdd5f98.wav deleted file mode 100644 index 87f31433..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_2fdd5f98.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_333cf329.wav b/dsLightRag/Test/TTS/output/pipeline_tts_333cf329.wav deleted file mode 100644 index bcda8853..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_333cf329.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_36f27419.wav b/dsLightRag/Test/TTS/output/pipeline_tts_36f27419.wav deleted file mode 100644 index aa0e2595..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_36f27419.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_446a5fae.wav b/dsLightRag/Test/TTS/output/pipeline_tts_446a5fae.wav deleted file mode 100644 index 79ffba21..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_446a5fae.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_4fa08a23.wav b/dsLightRag/Test/TTS/output/pipeline_tts_4fa08a23.wav deleted file mode 100644 index 27ea0bb3..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_4fa08a23.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_53a256c0.wav b/dsLightRag/Test/TTS/output/pipeline_tts_53a256c0.wav deleted file mode 100644 index b1550a78..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_53a256c0.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_58f66415.wav b/dsLightRag/Test/TTS/output/pipeline_tts_58f66415.wav deleted file mode 100644 index 9074d50f..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_58f66415.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_63d85d6a.wav b/dsLightRag/Test/TTS/output/pipeline_tts_63d85d6a.wav deleted file mode 100644 index 75baf3b1..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_63d85d6a.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_6a18ae85.wav b/dsLightRag/Test/TTS/output/pipeline_tts_6a18ae85.wav deleted file mode 100644 index 9dc31ded..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_6a18ae85.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_8a5da962.wav b/dsLightRag/Test/TTS/output/pipeline_tts_8a5da962.wav deleted file mode 100644 index 29be9a18..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_8a5da962.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_95186633.wav b/dsLightRag/Test/TTS/output/pipeline_tts_95186633.wav deleted file mode 100644 index 65df2b32..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_95186633.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_9c3dc95f.wav b/dsLightRag/Test/TTS/output/pipeline_tts_9c3dc95f.wav deleted file mode 100644 index 197584cf..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_9c3dc95f.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_abbf997c.wav b/dsLightRag/Test/TTS/output/pipeline_tts_abbf997c.wav deleted file mode 100644 index 68212db3..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_abbf997c.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_b2427c95.wav b/dsLightRag/Test/TTS/output/pipeline_tts_b2427c95.wav deleted file mode 100644 index 712d7e01..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_b2427c95.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_b8560631.wav b/dsLightRag/Test/TTS/output/pipeline_tts_b8560631.wav deleted file mode 100644 index 833b8cdd..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_b8560631.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_c06050b3.wav b/dsLightRag/Test/TTS/output/pipeline_tts_c06050b3.wav deleted file mode 100644 index 22d16ce5..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_c06050b3.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_c64e1f40.wav b/dsLightRag/Test/TTS/output/pipeline_tts_c64e1f40.wav deleted file mode 100644 index f112c7fa..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_c64e1f40.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_d1d0c0d0.wav b/dsLightRag/Test/TTS/output/pipeline_tts_d1d0c0d0.wav deleted file mode 100644 index 866333a8..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_d1d0c0d0.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_d784b508.wav b/dsLightRag/Test/TTS/output/pipeline_tts_d784b508.wav deleted file mode 100644 index 3f075523..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_d784b508.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_ecf9de10.wav b/dsLightRag/Test/TTS/output/pipeline_tts_ecf9de10.wav deleted file mode 100644 index 9e738ad4..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_ecf9de10.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_f059331d.wav b/dsLightRag/Test/TTS/output/pipeline_tts_f059331d.wav deleted file mode 100644 index a5cd52d2..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_f059331d.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_f4de5e7b.wav b/dsLightRag/Test/TTS/output/pipeline_tts_f4de5e7b.wav deleted file mode 100644 index 70706b85..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_f4de5e7b.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_f778037b.wav b/dsLightRag/Test/TTS/output/pipeline_tts_f778037b.wav deleted file mode 100644 index 8de0b523..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_f778037b.wav and /dev/null differ diff --git a/dsLightRag/Test/TTS/output/pipeline_tts_fd9c5d9e.wav b/dsLightRag/Test/TTS/output/pipeline_tts_fd9c5d9e.wav deleted file mode 100644 index 5aae6efb..00000000 Binary files a/dsLightRag/Test/TTS/output/pipeline_tts_fd9c5d9e.wav and /dev/null differ