You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
4.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json
import asyncio
import time
from core.providers.tts.dto.dto import SentenceType
from core.utils.util import get_string_no_punctuation_or_emoji, analyze_emotion
from loguru import logger
TAG = __name__
emoji_map = {
"neutral": "😶",
"happy": "🙂",
"laughing": "😆",
"funny": "😂",
"sad": "😔",
"angry": "😠",
"crying": "😭",
"loving": "😍",
"embarrassed": "😳",
"surprised": "😲",
"shocked": "😱",
"thinking": "🤔",
"winking": "😉",
"cool": "😎",
"relaxed": "😌",
"delicious": "🤤",
"kissy": "😘",
"confident": "😏",
"sleepy": "😴",
"silly": "😜",
"confused": "🙄",
}
async def sendAudioMessage(conn, sentenceType, audios, text):
# 发送句子开始消息
conn.logger.bind(tag=TAG).info(f"发送音频消息: {sentenceType}, {text}")
if text is not None:
emotion = analyze_emotion(text)
emoji = emoji_map.get(emotion, "🙂") # 默认使用笑脸
await conn.websocket.send(
json.dumps(
{
"type": "llm",
"text": emoji,
"emotion": emotion,
"session_id": conn.session_id,
}
)
)
pre_buffer = False
if conn.tts.tts_audio_first_sentence and text is not None:
conn.logger.bind(tag=TAG).info(f"发送第一段语音: {text}")
conn.tts.tts_audio_first_sentence = False
pre_buffer = True
await send_tts_message(conn, "sentence_start", text)
await sendAudio(conn, audios, pre_buffer)
await send_tts_message(conn, "sentence_end", text)
# 发送结束消息(如果是最后一个文本)
if conn.llm_finish_task and sentenceType == SentenceType.LAST:
await send_tts_message(conn, "stop", None)
conn.client_is_speaking = False
if conn.close_after_chat:
await conn.close()
# 播放音频
async def sendAudio(conn, audios, pre_buffer=True):
if audios is None or len(audios) == 0:
return
# 流控参数优化
frame_duration = 60 # 帧时长(毫秒),匹配 Opus 编码
start_time = time.perf_counter()
play_position = 0
# 仅当第一句话时执行预缓冲
if pre_buffer:
pre_buffer_frames = min(3, len(audios))
for i in range(pre_buffer_frames):
await conn.websocket.send(audios[i])
remaining_audios = audios[pre_buffer_frames:]
else:
remaining_audios = audios
# 播放剩余音频帧
for opus_packet in remaining_audios:
if conn.client_abort:
break
# 重置没有声音的状态
conn.last_activity_time = time.time() * 1000
# 计算预期发送时间
expected_time = start_time + (play_position / 1000)
current_time = time.perf_counter()
delay = expected_time - current_time
if delay > 0:
await asyncio.sleep(delay)
await conn.websocket.send(opus_packet)
play_position += frame_duration
async def send_tts_message(conn, state, text=None):
"""发送 TTS 状态消息"""
message = {"type": "tts", "state": state, "session_id": conn.session_id}
if text is not None:
message["text"] = text
# TTS播放结束
if state == "stop":
# 播放提示音
tts_notify = conn.config.get("enable_stop_tts_notify", False)
if tts_notify:
stop_tts_notify_voice = conn.config.get(
"stop_tts_notify_voice", "config/assets/tts_notify.mp3"
)
audios, _ = conn.tts.audio_to_opus_data(stop_tts_notify_voice)
await sendAudio(conn, audios)
# 清除服务端讲话状态
conn.clearSpeakStatus()
# 发送消息到客户端
await conn.websocket.send(json.dumps(message))
async def send_stt_message(conn, text):
end_prompt_str = conn.config.get("end_prompt", {}).get("prompt")
if end_prompt_str and end_prompt_str == text:
await send_tts_message(conn, "start")
return
"""发送 STT 状态消息"""
# 解析JSON格式提取实际的用户说话内容
display_text = text
try:
# 尝试解析JSON格式
if text.strip().startswith('{') and text.strip().endswith('}'):
parsed_data = json.loads(text)
if isinstance(parsed_data, dict) and "content" in parsed_data:
# 如果是包含说话人信息的JSON格式只显示content部分
display_text = parsed_data["content"]
except (json.JSONDecodeError, TypeError):
# 如果不是JSON格式直接使用原始文本
display_text = text
stt_text = get_string_no_punctuation_or_emoji(display_text)
await conn.websocket.send(
json.dumps({"type": "stt", "text": stt_text, "session_id": conn.session_id})
)
conn.client_is_speaking = True
await send_tts_message(conn, "start")