diff --git a/dsRag/ElasticSearch/T6_XiangLiangQuery.py b/dsRag/ElasticSearch/T6_XiangLiangQuery.py index 1628df80..25c3bd92 100644 --- a/dsRag/ElasticSearch/T6_XiangLiangQuery.py +++ b/dsRag/ElasticSearch/T6_XiangLiangQuery.py @@ -2,9 +2,7 @@ import logging import os from logging.handlers import RotatingFileHandler -from gensim.models import KeyedVectors - -from Config.Config import ES_CONFIG, MS_MODEL_PATH, MS_MODEL_LIMIT +from Config.Config import ES_CONFIG from ElasticSearch.Utils.ElasticsearchConnectionPool import ElasticsearchConnectionPool # 初始化日志 @@ -16,11 +14,6 @@ handler = RotatingFileHandler('Logs/start.log', maxBytes=1024 * 1024, backupCoun handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(handler) -# 1. 加载预训练的 Word2Vec 模型 -model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT) -logger.info(f"模型加载成功,词向量维度: {model.vector_size}") - - def init_es_pool(): # 初始化Elasticsearch连接池 diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py index 6ded7acc..21aa4b6e 100644 --- a/dsRag/Util/EsSearchUtil.py +++ b/dsRag/Util/EsSearchUtil.py @@ -1,11 +1,21 @@ +import logging +import os +from logging.handlers import RotatingFileHandler + import jieba from elasticsearch import Elasticsearch -from gensim.models import KeyedVectors - -from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT +# 初始化日志 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +# 确保日志目录存在 +os.makedirs('Logs', exist_ok=True) +handler = RotatingFileHandler('Logs/start.log', maxBytes=1024 * 1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) class EsSearchUtil: + def __init__(self, es_config): """ 初始化Elasticsearch搜索工具 @@ -18,15 +28,30 @@ class EsSearchUtil: verify_certs=False ) - def text_to_embedding(self, text): + def __init__(self, es_config): + from gensim.models import KeyedVectors + from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT + # 加载预训练模型 - model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT) + self.model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT) + logger.info(f"模型加载成功,词向量维度: {self.model.vector_size}") + + # 初始化Elasticsearch连接 + self.es = Elasticsearch( + hosts=es_config['hosts'], + basic_auth=es_config['basic_auth'], + verify_certs=False + ) + self.index_name = es_config['index_name'] + + def text_to_embedding(self, text): + # 使用已加载的模型 # 对文本分词并计算平均向量 words = jieba.lcut(text) - vectors = [model[word] for word in words if word in model] + vectors = [self.model[word] for word in words if word in self.model] if not vectors: - return [0.0] * model.vector_size + return [0.0] * self.model.vector_size # 计算平均向量 avg_vector = [sum(dim)/len(vectors) for dim in zip(*vectors)] diff --git a/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc index f57645b7..b3211771 100644 Binary files a/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc and b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc differ