from funasr import AutoModel from funasr.utils.postprocess_utils import rich_transcription_postprocess model_dir = "./" # ASR部分用的阿里的开源funasr, 即fun-asr, github地址: # https://github.com/modelscope/FunASR model = AutoModel( model=model_dir, vad_model="fsmn-vad", vad_kwargs={"max_single_segment_time": 30000}, # 注释掉此句的话,可以使用CPU进行识别,不再使用GPU # device="cuda:0", hub="hf", ) # en res = model.generate( input=f"{model.model_path}/example/en.mp3", cache={}, language="auto", # "zn", "en", "yue", "ja", "ko", "nospeech" use_itn=True, batch_size_s=60, merge_vad=True, # merge_length_s=15, ) # 把转出来的文字做了一个合理化的后处理, 比如去掉一些标点符号, 明显不合理的句子等等. text = rich_transcription_postprocess(res[0]["text"]) print(text)