main
HuangHai 3 months ago
parent cfab493bec
commit 562be8eeeb

@ -64,7 +64,7 @@ class ASRProvider(ASRProviderBase):
pcm_data.append(pcm_frame)
except opuslib_next.OpusError as e:
logger.bind(tag=TAG).error(f"Opus解码错误: {e}", exc_info=True)
# 【黄海注释】这里把用户输入的语音WAV保存成了文件
with wave.open(file_path, "wb") as wf:
wf.setnchannels(1)
wf.setsampwidth(2) # 2 bytes = 16-bit

@ -3,11 +3,13 @@ from funasr.utils.postprocess_utils import rich_transcription_postprocess
model_dir = "./"
# ASR部分用的阿里的开源funasr, 即fun-asr, github地址:
# https://github.com/modelscope/FunASR
model = AutoModel(
model=model_dir,
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
# 注释掉此句的话可以使用CPU进行识别不再使用GPU
# device="cuda:0",
hub="hf",
)
@ -22,6 +24,7 @@ res = model.generate(
merge_vad=True, #
merge_length_s=15,
)
# 把转出来的文字做了一个合理化的后处理, 比如去掉一些标点符号, 明显不合理的句子等等.
text = rich_transcription_postprocess(res[0]["text"])
print(text)

@ -0,0 +1,13 @@
1、ASR
D:\dsWork\QingLong\XiaoZhi\xiaozhi-esp32-server\main\xiaozhi-server\core\handle\receiveAudioHandle.py
if conn.client_voice_stop:
conn.client_abort = False
conn.asr_server_receive = False
# 音频太短了,无法识别
if len(conn.asr_audio) < 15:
conn.asr_server_receive = True
else:
text, file_path = await conn.asr.speech_to_text(
conn.asr_audio, conn.session_id
)
约第38行
Loading…
Cancel
Save