20 lines
783 B
Python
20 lines
783 B
Python
|
import whisper
|
|||
|
|
|||
|
model = whisper.load_model("turbo", device="cuda")
|
|||
|
result = model.transcribe("audio2.mp4", word_timestamps=False, initial_prompt="这是小学四年级习作问题诊断及指导的视频。", verbose=False, language='zh')
|
|||
|
|
|||
|
# result = model.transcribe("audio.mp3", word_timestamps=False, initial_prompt="这是一段小说。", verbose=False, language='zh')
|
|||
|
|
|||
|
|
|||
|
def seconds_to_min_sec(total_seconds: int) -> str:
|
|||
|
minutes, seconds = divmod(total_seconds, 60)
|
|||
|
return f"{minutes}分{seconds:02d}秒"
|
|||
|
|
|||
|
|
|||
|
for item in result['segments']:
|
|||
|
print('时间:' + str(seconds_to_min_sec(int(item['start']))) + ' ~ ' + str(
|
|||
|
seconds_to_min_sec(int(item['end']))) + ';' + '文本:' + str(
|
|||
|
item['text']))
|
|||
|
|
|||
|
print('所有文本内容:' + result['text'])
|