83 lines
2.2 KiB
Python
83 lines
2.2 KiB
Python
|
import whisper
|
|||
|
import math
|
|||
|
from pydub import AudioSegment # pip install pydub
|
|||
|
|
|||
|
# --------- 参数 ----------
|
|||
|
MODEL_NAME = "turbo"
|
|||
|
DEVICE = "cuda"
|
|||
|
AUDIO_PATH = "audio.mp3"
|
|||
|
SEGMENT_SEC = 10 # 每段 10 秒,越小粒度越细
|
|||
|
OVERLAP_SEC = 0
|
|||
|
LANGUAGE = "zh"
|
|||
|
INITIAL_PROMPT = "这是一段小说。"
|
|||
|
# -------------------------
|
|||
|
|
|||
|
# 1. 把音频切成小段
|
|||
|
audio = AudioSegment.from_file(AUDIO_PATH)
|
|||
|
total_ms = len(audio) # 总时长(毫秒)
|
|||
|
segment_ms = SEGMENT_SEC * 1000
|
|||
|
step_ms = (SEGMENT_SEC - OVERLAP_SEC) * 1000
|
|||
|
|
|||
|
segments = [
|
|||
|
audio[i:i + segment_ms] # pydub 切片
|
|||
|
for i in range(0, total_ms, step_ms)
|
|||
|
]
|
|||
|
|
|||
|
total_chunks = len(segments)
|
|||
|
print(f"音频共 {total_ms / 1000:.1f} 秒,切成 {total_chunks} 段处理")
|
|||
|
|
|||
|
# 2. 加载模型
|
|||
|
model = whisper.load_model(MODEL_NAME, device=DEVICE)
|
|||
|
|
|||
|
# 3. 逐段转录 + 打印进度
|
|||
|
all_segments = []
|
|||
|
|
|||
|
|
|||
|
def percentage_of(total: int, part: int) -> int:
|
|||
|
if total <= 0:
|
|||
|
raise ValueError("第一个参数(总数)必须大于 0")
|
|||
|
if part >= total:
|
|||
|
return 100
|
|||
|
return int(round((part / total) * 100))
|
|||
|
|
|||
|
|
|||
|
for idx, seg in enumerate(segments, 1):
|
|||
|
# 临时保存为 wav,让 Whisper 读取
|
|||
|
tmp_wav = f"_tmp_{idx}.wav"
|
|||
|
seg.export(tmp_wav, format="wav")
|
|||
|
|
|||
|
result = model.transcribe(
|
|||
|
tmp_wav,
|
|||
|
language=LANGUAGE,
|
|||
|
initial_prompt=INITIAL_PROMPT,
|
|||
|
verbose=False,
|
|||
|
word_timestamps=False
|
|||
|
)
|
|||
|
all_segments.append(result)
|
|||
|
|
|||
|
# 计算并打印进度
|
|||
|
progress = idx / total_chunks * 100
|
|||
|
print(str(percentage_of(total_chunks, idx)) + "%")
|
|||
|
|
|||
|
# 删除临时文件
|
|||
|
import os;
|
|||
|
|
|||
|
os.remove(tmp_wav)
|
|||
|
|
|||
|
print("\n转录完成!")
|
|||
|
|
|||
|
|
|||
|
# 4. 合并结果,按时间轴输出
|
|||
|
def seconds_to_min_sec(total_seconds: int) -> str:
|
|||
|
minutes, seconds = divmod(total_seconds, 60)
|
|||
|
return f"{minutes}分{seconds:02d}秒"
|
|||
|
|
|||
|
|
|||
|
current_start = 0
|
|||
|
for chunk_result in all_segments:
|
|||
|
for seg in chunk_result["segments"]:
|
|||
|
start = current_start + seg["start"]
|
|||
|
end = current_start + seg["end"]
|
|||
|
print(f"时间:{seconds_to_min_sec(int(start))} ~ {seconds_to_min_sec(int(end))};文本:{seg['text'].strip()}")
|
|||
|
current_start += SEGMENT_SEC - OVERLAP_SEC
|