diff --git a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/core/providers/asr/fun_local.py b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/core/providers/asr/fun_local.py index 82d09ab5..cb4e94e2 100644 --- a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/core/providers/asr/fun_local.py +++ b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/core/providers/asr/fun_local.py @@ -64,7 +64,7 @@ class ASRProvider(ASRProviderBase): pcm_data.append(pcm_frame) except opuslib_next.OpusError as e: logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True) - + # 【黄海注释】:这里把用户输入的语音WAV保存成了文件 with wave.open(file_path, "wb") as wf: wf.setnchannels(1) wf.setsampwidth(2) # 2 bytes = 16-bit diff --git a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/models/SenseVoiceSmall/demo.py b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/models/SenseVoiceSmall/demo.py index 531e9798..aab54967 100644 --- a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/models/SenseVoiceSmall/demo.py +++ b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/models/SenseVoiceSmall/demo.py @@ -3,11 +3,13 @@ from funasr.utils.postprocess_utils import rich_transcription_postprocess model_dir = "./" - +# ASR部分用的阿里的开源funasr, 即fun-asr, github地址: +# https://github.com/modelscope/FunASR model = AutoModel( model=model_dir, vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, + # 注释掉此句的话,可以使用CPU进行识别,不再使用GPU # device="cuda:0", hub="hf", ) @@ -22,6 +24,7 @@ res = model.generate( merge_vad=True, # merge_length_s=15, ) +# 把转出来的文字做了一个合理化的后处理, 比如去掉一些标点符号, 明显不合理的句子等等. text = rich_transcription_postprocess(res[0]["text"]) print(text) diff --git a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/test/callFunction.py b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/test/callFunction.py new file mode 100644 index 00000000..e69de29b diff --git a/XiaoZhi/文档/【黄海】今天的天气怎么样.wav b/XiaoZhi/文档/【黄海】今天的天气怎么样.wav new file mode 100644 index 00000000..13115f0a Binary files /dev/null and b/XiaoZhi/文档/【黄海】今天的天气怎么样.wav differ diff --git a/XiaoZhi/小智开发板固件烧录和设备激活.docx b/XiaoZhi/文档/小智开发板固件烧录和设备激活.docx similarity index 100% rename from XiaoZhi/小智开发板固件烧录和设备激活.docx rename to XiaoZhi/文档/小智开发板固件烧录和设备激活.docx diff --git a/XiaoZhi/文档/技术文档分析.txt b/XiaoZhi/文档/技术文档分析.txt new file mode 100644 index 00000000..588726a2 --- /dev/null +++ b/XiaoZhi/文档/技术文档分析.txt @@ -0,0 +1,13 @@ +1、ASR +D:\dsWork\QingLong\XiaoZhi\xiaozhi-esp32-server\main\xiaozhi-server\core\handle\receiveAudioHandle.py + if conn.client_voice_stop: + conn.client_abort = False + conn.asr_server_receive = False + # 音频太短了,无法识别 + if len(conn.asr_audio) < 15: + conn.asr_server_receive = True + else: + text, file_path = await conn.asr.speech_to_text( + conn.asr_audio, conn.session_id + ) +约第38行 diff --git a/XiaoZhi/配置文档.md b/XiaoZhi/文档/配置文档.md similarity index 100% rename from XiaoZhi/配置文档.md rename to XiaoZhi/文档/配置文档.md