Files
ocr/test005.py
2025-08-14 16:04:59 +08:00

83 lines
2.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import whisper
import math
from pydub import AudioSegment # pip install pydub
# --------- 参数 ----------
MODEL_NAME = "turbo"
DEVICE = "cuda"
AUDIO_PATH = "audio.mp3"
SEGMENT_SEC = 10 # 每段 10 秒,越小粒度越细
OVERLAP_SEC = 0
LANGUAGE = "zh"
INITIAL_PROMPT = "这是一段小说。"
# -------------------------
# 1. 把音频切成小段
audio = AudioSegment.from_file(AUDIO_PATH)
total_ms = len(audio) # 总时长(毫秒)
segment_ms = SEGMENT_SEC * 1000
step_ms = (SEGMENT_SEC - OVERLAP_SEC) * 1000
segments = [
audio[i:i + segment_ms] # pydub 切片
for i in range(0, total_ms, step_ms)
]
total_chunks = len(segments)
print(f"音频共 {total_ms / 1000:.1f} 秒,切成 {total_chunks} 段处理")
# 2. 加载模型
model = whisper.load_model(MODEL_NAME, device=DEVICE)
# 3. 逐段转录 + 打印进度
all_segments = []
def percentage_of(total: int, part: int) -> int:
if total <= 0:
raise ValueError("第一个参数(总数)必须大于 0")
if part >= total:
return 100
return int(round((part / total) * 100))
for idx, seg in enumerate(segments, 1):
# 临时保存为 wav让 Whisper 读取
tmp_wav = f"_tmp_{idx}.wav"
seg.export(tmp_wav, format="wav")
result = model.transcribe(
tmp_wav,
language=LANGUAGE,
initial_prompt=INITIAL_PROMPT,
verbose=False,
word_timestamps=False
)
all_segments.append(result)
# 计算并打印进度
progress = idx / total_chunks * 100
print(str(percentage_of(total_chunks, idx)) + "%")
# 删除临时文件
import os;
os.remove(tmp_wav)
print("\n转录完成!")
# 4. 合并结果,按时间轴输出
def seconds_to_min_sec(total_seconds: int) -> str:
minutes, seconds = divmod(total_seconds, 60)
return f"{minutes}{seconds:02d}"
current_start = 0
for chunk_result in all_segments:
for seg in chunk_result["segments"]:
start = current_start + seg["start"]
end = current_start + seg["end"]
print(f"时间:{seconds_to_min_sec(int(start))} ~ {seconds_to_min_sec(int(end))};文本:{seg['text'].strip()}")
current_start += SEGMENT_SEC - OVERLAP_SEC