import whisper model = whisper.load_model("turbo", device="cuda") result = model.transcribe("audio2.mp4", word_timestamps=False, initial_prompt="这是小学四年级习作问题诊断及指导的视频。", verbose=False, language='zh') # result = model.transcribe("audio.mp3", word_timestamps=False, initial_prompt="这是一段小说。", verbose=False, language='zh') def seconds_to_min_sec(total_seconds: int) -> str: minutes, seconds = divmod(total_seconds, 60) return f"{minutes}分{seconds:02d}秒" for item in result['segments']: print('时间:' + str(seconds_to_min_sec(int(item['start']))) + ' ~ ' + str( seconds_to_min_sec(int(item['end']))) + ';' + '文本:' + str( item['text'])) print('所有文本内容:' + result['text'])