from core.handle.sendAudioHandle import send_stt_message from core.handle.intentHandler import handle_user_intent from core.utils.output_counter import check_device_output_limit from core.handle.abortHandle import handleAbortMessage import time import asyncio import json from core.handle.sendAudioHandle import SentenceType from core.utils.util import audio_to_data TAG = __name__ async def handleAudioMessage(conn, audio): # 当前片段是否有人说话 have_voice = conn.vad.is_vad(conn, audio) # 如果设备刚刚被唤醒,短暂忽略VAD检测 if have_voice and hasattr(conn, "just_woken_up") and conn.just_woken_up: have_voice = False # 设置一个短暂延迟后恢复VAD检测 conn.asr_audio.clear() if not hasattr(conn, "vad_resume_task") or conn.vad_resume_task.done(): conn.vad_resume_task = asyncio.create_task(resume_vad_detection(conn)) return if have_voice: if conn.client_is_speaking: await handleAbortMessage(conn) # 设备长时间空闲检测,用于say goodbye await no_voice_close_connect(conn, have_voice) # 接收音频 await conn.asr.receive_audio(conn, audio, have_voice) async def resume_vad_detection(conn): # 等待2秒后恢复VAD检测 await asyncio.sleep(1) conn.just_woken_up = False async def startToChat(conn, text): # 检查输入是否是JSON格式(包含说话人信息) speaker_name = None actual_text = text try: # 尝试解析JSON格式的输入 if text.strip().startswith('{') and text.strip().endswith('}'): data = json.loads(text) if 'speaker' in data and 'content' in data: speaker_name = data['speaker'] actual_text = data['content'] conn.logger.bind(tag=TAG).info(f"解析到说话人信息: {speaker_name}") # 直接使用JSON格式的文本,不解析 actual_text = text except (json.JSONDecodeError, KeyError): # 如果解析失败,继续使用原始文本 pass # 保存说话人信息到连接对象 if speaker_name: conn.current_speaker = speaker_name else: conn.current_speaker = None if conn.need_bind: await check_bind_device(conn) return # 如果当日的输出字数大于限定的字数 if conn.max_output_size > 0: if check_device_output_limit( conn.headers.get("device-id"), conn.max_output_size ): await max_out_size(conn) return if conn.client_is_speaking: await handleAbortMessage(conn) # 首先进行意图分析,使用实际文本内容 intent_handled = await handle_user_intent(conn, actual_text) if intent_handled: # 如果意图已被处理,不再进行聊天 return # 意图未被处理,继续常规聊天流程,使用实际文本内容 await send_stt_message(conn, actual_text) conn.executor.submit(conn.chat, actual_text) async def no_voice_close_connect(conn, have_voice): if have_voice: conn.last_activity_time = time.time() * 1000 return # 只有在已经初始化过时间戳的情况下才进行超时检查 if conn.last_activity_time > 0.0: no_voice_time = time.time() * 1000 - conn.last_activity_time close_connection_no_voice_time = int( conn.config.get("close_connection_no_voice_time", 120) ) if ( not conn.close_after_chat and no_voice_time > 1000 * close_connection_no_voice_time ): conn.close_after_chat = True conn.client_abort = False end_prompt = conn.config.get("end_prompt", {}) if end_prompt and end_prompt.get("enable", True) is False: conn.logger.bind(tag=TAG).info("结束对话,无需发送结束提示语") await conn.close() return prompt = end_prompt.get("prompt") if not prompt: prompt = "请你以```时间过得真快```未来头,用富有感情、依依不舍的话来结束这场对话吧。!" await startToChat(conn, prompt) async def max_out_size(conn): text = "不好意思,我现在有点事情要忙,明天这个时候我们再聊,约好了哦!明天不见不散,拜拜!" await send_stt_message(conn, text) file_path = "config/assets/max_output_size.wav" opus_packets, _ = audio_to_data(file_path) conn.tts.tts_audio_queue.put((SentenceType.LAST, opus_packets, text)) conn.close_after_chat = True async def check_bind_device(conn): if conn.bind_code: # 确保bind_code是6位数字 if len(conn.bind_code) != 6: conn.logger.bind(tag=TAG).error(f"无效的绑定码格式: {conn.bind_code}") text = "绑定码格式错误,请检查配置。" await send_stt_message(conn, text) return text = f"请登录控制面板,输入{conn.bind_code},绑定设备。" await send_stt_message(conn, text) # 播放提示音 music_path = "config/assets/bind_code.wav" opus_packets, _ = audio_to_data(music_path) conn.tts.tts_audio_queue.put((SentenceType.FIRST, opus_packets, text)) # 逐个播放数字 for i in range(6): # 确保只播放6位数字 try: digit = conn.bind_code[i] num_path = f"config/assets/bind_code/{digit}.wav" num_packets, _ = audio_to_data(num_path) conn.tts.tts_audio_queue.put((SentenceType.MIDDLE, num_packets, None)) except Exception as e: conn.logger.bind(tag=TAG).error(f"播放数字音频失败: {e}") continue conn.tts.tts_audio_queue.put((SentenceType.LAST, [], None)) else: text = f"没有找到该设备的版本信息,请正确配置 OTA地址,然后重新编译固件。" await send_stt_message(conn, text) music_path = "config/assets/bind_not_found.wav" opus_packets, _ = audio_to_data(music_path) conn.tts.tts_audio_queue.put((SentenceType.LAST, opus_packets, text))