83 lines
2.2 KiB
Python
83 lines
2.2 KiB
Python
import whisper
|
||
import math
|
||
from pydub import AudioSegment # pip install pydub
|
||
|
||
# --------- 参数 ----------
|
||
MODEL_NAME = "turbo"
|
||
DEVICE = "cuda"
|
||
AUDIO_PATH = "audio.mp3"
|
||
SEGMENT_SEC = 10 # 每段 10 秒,越小粒度越细
|
||
OVERLAP_SEC = 0
|
||
LANGUAGE = "zh"
|
||
INITIAL_PROMPT = "这是一段小说。"
|
||
# -------------------------
|
||
|
||
# 1. 把音频切成小段
|
||
audio = AudioSegment.from_file(AUDIO_PATH)
|
||
total_ms = len(audio) # 总时长(毫秒)
|
||
segment_ms = SEGMENT_SEC * 1000
|
||
step_ms = (SEGMENT_SEC - OVERLAP_SEC) * 1000
|
||
|
||
segments = [
|
||
audio[i:i + segment_ms] # pydub 切片
|
||
for i in range(0, total_ms, step_ms)
|
||
]
|
||
|
||
total_chunks = len(segments)
|
||
print(f"音频共 {total_ms / 1000:.1f} 秒,切成 {total_chunks} 段处理")
|
||
|
||
# 2. 加载模型
|
||
model = whisper.load_model(MODEL_NAME, device=DEVICE)
|
||
|
||
# 3. 逐段转录 + 打印进度
|
||
all_segments = []
|
||
|
||
|
||
def percentage_of(total: int, part: int) -> int:
|
||
if total <= 0:
|
||
raise ValueError("第一个参数(总数)必须大于 0")
|
||
if part >= total:
|
||
return 100
|
||
return int(round((part / total) * 100))
|
||
|
||
|
||
for idx, seg in enumerate(segments, 1):
|
||
# 临时保存为 wav,让 Whisper 读取
|
||
tmp_wav = f"_tmp_{idx}.wav"
|
||
seg.export(tmp_wav, format="wav")
|
||
|
||
result = model.transcribe(
|
||
tmp_wav,
|
||
language=LANGUAGE,
|
||
initial_prompt=INITIAL_PROMPT,
|
||
verbose=False,
|
||
word_timestamps=False
|
||
)
|
||
all_segments.append(result)
|
||
|
||
# 计算并打印进度
|
||
progress = idx / total_chunks * 100
|
||
print(str(percentage_of(total_chunks, idx)) + "%")
|
||
|
||
# 删除临时文件
|
||
import os;
|
||
|
||
os.remove(tmp_wav)
|
||
|
||
print("\n转录完成!")
|
||
|
||
|
||
# 4. 合并结果,按时间轴输出
|
||
def seconds_to_min_sec(total_seconds: int) -> str:
|
||
minutes, seconds = divmod(total_seconds, 60)
|
||
return f"{minutes}分{seconds:02d}秒"
|
||
|
||
|
||
current_start = 0
|
||
for chunk_result in all_segments:
|
||
for seg in chunk_result["segments"]:
|
||
start = current_start + seg["start"]
|
||
end = current_start + seg["end"]
|
||
print(f"时间:{seconds_to_min_sec(int(start))} ~ {seconds_to_min_sec(int(end))};文本:{seg['text'].strip()}")
|
||
current_start += SEGMENT_SEC - OVERLAP_SEC
|