init

2025-08-14 16:04:59 +08:00
commit d8bceb3fe1
73 changed files with 3644 additions and 0 deletions
--- a/test005.py
+++ b/test005.py
@@ -0,0 +1,82 @@
+import whisper
+import math
+from pydub import AudioSegment  # pip install pydub
+
+# --------- 参数 ----------
+MODEL_NAME = "turbo"
+DEVICE = "cuda"
+AUDIO_PATH = "audio.mp3"
+SEGMENT_SEC = 10  # 每段 10 秒，越小粒度越细
+OVERLAP_SEC = 0
+LANGUAGE = "zh"
+INITIAL_PROMPT = "这是一段小说。"
+# -------------------------
+
+# 1. 把音频切成小段
+audio = AudioSegment.from_file(AUDIO_PATH)
+total_ms = len(audio)  # 总时长（毫秒）
+segment_ms = SEGMENT_SEC * 1000
+step_ms = (SEGMENT_SEC - OVERLAP_SEC) * 1000
+
+segments = [
+    audio[i:i + segment_ms]  # pydub 切片
+    for i in range(0, total_ms, step_ms)
+]
+
+total_chunks = len(segments)
+print(f"音频共 {total_ms / 1000:.1f} 秒，切成 {total_chunks} 段处理")
+
+# 2. 加载模型
+model = whisper.load_model(MODEL_NAME, device=DEVICE)
+
+# 3. 逐段转录 + 打印进度
+all_segments = []
+
+
+def percentage_of(total: int, part: int) -> int:
+    if total <= 0:
+        raise ValueError("第一个参数（总数）必须大于 0")
+    if part >= total:
+        return 100
+    return int(round((part / total) * 100))
+
+
+for idx, seg in enumerate(segments, 1):
+    # 临时保存为 wav，让 Whisper 读取
+    tmp_wav = f"_tmp_{idx}.wav"
+    seg.export(tmp_wav, format="wav")
+
+    result = model.transcribe(
+        tmp_wav,
+        language=LANGUAGE,
+        initial_prompt=INITIAL_PROMPT,
+        verbose=False,
+        word_timestamps=False
+    )
+    all_segments.append(result)
+
+    # 计算并打印进度
+    progress = idx / total_chunks * 100
+    print(str(percentage_of(total_chunks, idx)) + "%")
+
+    # 删除临时文件
+    import os;
+
+    os.remove(tmp_wav)
+
+print("\n转录完成！")
+
+
+# 4. 合并结果，按时间轴输出
+def seconds_to_min_sec(total_seconds: int) -> str:
+    minutes, seconds = divmod(total_seconds, 60)
+    return f"{minutes}分{seconds:02d}秒"
+
+
+current_start = 0
+for chunk_result in all_segments:
+    for seg in chunk_result["segments"]:
+        start = current_start + seg["start"]
+        end = current_start + seg["end"]
+        print(f"时间：{seconds_to_min_sec(int(start))} ~ {seconds_to_min_sec(int(end))}；文本：{seg['text'].strip()}")
+    current_start += SEGMENT_SEC - OVERLAP_SEC