from resemblyzer import preprocess_wav, VoiceEncoder from pathlib import Path import numpy as np import soundfile as sf import io # 初始化声纹编码器 encoder = VoiceEncoder() # 存储已知声纹和对应的说话人名称 known_speakers = {} def load_known_speakers(speaker_folder): """ 加载已知说话人的声纹 :param speaker_folder: 存储说话人音频文件的文件夹路径 """ speaker_folder = Path(speaker_folder) for speaker_dir in speaker_folder.iterdir(): if speaker_dir.is_dir(): speaker_name = speaker_dir.name wav_files = list(speaker_dir.glob("*.wav")) embeddings = [] for wav_file in wav_files: wav = preprocess_wav(wav_file) embedding = encoder.embed_utterance(wav) embeddings.append(embedding) if embeddings: known_speakers[speaker_name] = np.mean(embeddings, axis=0) print(f"已加载说话人: {speaker_name}") def recognize_speaker_from_file(audio_file_path, threshold=0.7): """ 从音频文件识别说话人 :param audio_file_path: 音频文件路径 :param threshold: 相似度阈值,低于此值认为是未知说话人 :return: (说话人名称, 相似度) """ try: # 预处理音频文件 wav = preprocess_wav(audio_file_path) # 获取声纹特征 embedding = encoder.embed_utterance(wav) best_similarity = -1 best_speaker = None # 与已知说话人进行比对 for speaker, known_embedding in known_speakers.items(): similarity = np.dot(embedding, known_embedding) if similarity > best_similarity: best_similarity = similarity best_speaker = speaker # 如果相似度低于阈值,认为是未知说话人 if best_similarity < threshold: return "未知说话人", best_similarity return best_speaker, best_similarity except Exception as e: print(f"识别说话人时出错: {str(e)}") return None, 0 def recognize_speaker(audio): """ 识别说话人 :param audio: 音频数据 :return: 说话人名称 """ wav = preprocess_wav(audio.get_wav_data()) embedding = encoder.embed_utterance(wav) best_similarity = -1 best_speaker = None for speaker, known_embedding in known_speakers.items(): similarity = np.dot(embedding, known_embedding) if similarity > best_similarity: best_similarity = similarity best_speaker = speaker return best_speaker if __name__ == "__main__": # 加载已知说话人的声纹 speaker_folder = "known_speakers" # 已知说话人音频文件夹 load_known_speakers(speaker_folder) # 要识别的音频文件路径 test_audio_path = "huanghai_test.wav" # 替换为你要识别的音频文件路径 # 识别说话人 speaker, similarity = recognize_speaker_from_file(test_audio_path) if speaker: print(f"识别结果: {speaker}") print(f"相似度: {similarity:.4f}") else: print("无法识别说话人")