'commit'

3 months ago · 562be8eeeb
parent cfab493bec
commit 562be8eeeb
7 changed files with 18 additions and 2 deletions
--- a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/core/providers/asr/fun_local.py
+++ b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/core/providers/asr/fun_local.py
@ -64,7 +64,7 @@ class ASRProvider(ASRProviderBase):
                pcm_data.append(pcm_frame)
            except opuslib_next.OpusError as e:
                logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True)
-
+        # 【黄海注释】：这里把用户输入的语音WAV保存成了文件
        with wave.open(file_path, "wb") as wf:
            wf.setnchannels(1)
            wf.setsampwidth(2)  # 2 bytes = 16-bit
--- a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/models/SenseVoiceSmall/demo.py
+++ b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/models/SenseVoiceSmall/demo.py
@ -3,11 +3,13 @@ from funasr.utils.postprocess_utils import rich_transcription_postprocess

 model_dir = "./"

-
+# ASR部分用的阿里的开源funasr, 即fun-asr, github地址:
+# https://github.com/modelscope/FunASR
 model = AutoModel(
    model=model_dir,
    vad_model="fsmn-vad",
    vad_kwargs={"max_single_segment_time": 30000},
+    # 注释掉此句的话，可以使用CPU进行识别，不再使用GPU
    # device="cuda:0",
    hub="hf",
 )
@ -22,6 +24,7 @@ res = model.generate(
    merge_vad=True,  #
    merge_length_s=15,
 )
+# 把转出来的文字做了一个合理化的后处理, 比如去掉一些标点符号, 明显不合理的句子等等.
 text = rich_transcription_postprocess(res[0]["text"])
 print(text)

--- a/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/test/callFunction.py
+++ b/XiaoZhi/xiaozhi-esp32-server/main/xiaozhi-server/test/callFunction.py
--- a/XiaoZhi/文档/【黄海】今天的天气怎么样.wav
+++ b/XiaoZhi/文档/【黄海】今天的天气怎么样.wav
--- a/XiaoZhi/文档/小智开发板固件烧录和设备激活.docx
+++ b/XiaoZhi/文档/小智开发板固件烧录和设备激活.docx
--- a/XiaoZhi/文档/技术文档分析.txt
+++ b/XiaoZhi/文档/技术文档分析.txt
@ -0,0 +1,13 @@
+1、ASR
+D:\dsWork\QingLong\XiaoZhi\xiaozhi-esp32-server\main\xiaozhi-server\core\handle\receiveAudioHandle.py
+    if conn.client_voice_stop:
+        conn.client_abort = False
+        conn.asr_server_receive = False
+        # 音频太短了，无法识别
+        if len(conn.asr_audio) < 15:
+            conn.asr_server_receive = True
+        else:
+            text, file_path = await conn.asr.speech_to_text(
+                conn.asr_audio, conn.session_id
+            )
+约第38行
--- a/XiaoZhi/文档/配置文档.md
+++ b/XiaoZhi/文档/配置文档.md