import logging import jieba from gensim.models import KeyedVectors from Config.Config import * # 配置日志 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) # 初始化 Word2Vec 模型 model = KeyedVectors.load_word2vec_format(WORD2VEC_MODEL_PATH, binary=False, limit=10000) logger.info(f"模型加载成功,词向量维度: {model.vector_size}") # 将文本转换为嵌入向量 def text_to_embedding(text): words = jieba.lcut(text) # 使用 jieba 分词 logger.info(f"文本: {text}, 分词结果: {words}") embeddings = [model[word] for word in words if word in model] logger.info(f"有效词向量数量: {len(embeddings)}") if embeddings: avg_embedding = sum(embeddings) / len(embeddings) logger.info(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维 return avg_embedding else: logger.warning("未找到有效词,返回零向量") return [0.0] * model.vector_size