From c9bf55b7fe1ab0a70d60432d8e28313b5d364722 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Fri, 27 Jun 2025 14:49:25 +0800 Subject: [PATCH] 'commit' --- dsRag/ElasticSearch/T6_XiangLiangQuery.py | 9 +--- dsRag/Util/EsSearchUtil.py | 39 ++++++++++++++---- .../__pycache__/EsSearchUtil.cpython-310.pyc | Bin 3156 -> 3824 bytes 3 files changed, 33 insertions(+), 15 deletions(-) diff --git a/dsRag/ElasticSearch/T6_XiangLiangQuery.py b/dsRag/ElasticSearch/T6_XiangLiangQuery.py index 1628df80..25c3bd92 100644 --- a/dsRag/ElasticSearch/T6_XiangLiangQuery.py +++ b/dsRag/ElasticSearch/T6_XiangLiangQuery.py @@ -2,9 +2,7 @@ import logging import os from logging.handlers import RotatingFileHandler -from gensim.models import KeyedVectors - -from Config.Config import ES_CONFIG, MS_MODEL_PATH, MS_MODEL_LIMIT +from Config.Config import ES_CONFIG from ElasticSearch.Utils.ElasticsearchConnectionPool import ElasticsearchConnectionPool # 初始化日志 @@ -16,11 +14,6 @@ handler = RotatingFileHandler('Logs/start.log', maxBytes=1024 * 1024, backupCoun handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) logger.addHandler(handler) -# 1. 加载预训练的 Word2Vec 模型 -model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT) -logger.info(f"模型加载成功,词向量维度: {model.vector_size}") - - def init_es_pool(): # 初始化Elasticsearch连接池 diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py index 6ded7acc..21aa4b6e 100644 --- a/dsRag/Util/EsSearchUtil.py +++ b/dsRag/Util/EsSearchUtil.py @@ -1,11 +1,21 @@ +import logging +import os +from logging.handlers import RotatingFileHandler + import jieba from elasticsearch import Elasticsearch -from gensim.models import KeyedVectors - -from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT +# 初始化日志 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +# 确保日志目录存在 +os.makedirs('Logs', exist_ok=True) +handler = RotatingFileHandler('Logs/start.log', maxBytes=1024 * 1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) class EsSearchUtil: + def __init__(self, es_config): """ 初始化Elasticsearch搜索工具 @@ -18,15 +28,30 @@ class EsSearchUtil: verify_certs=False ) - def text_to_embedding(self, text): + def __init__(self, es_config): + from gensim.models import KeyedVectors + from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT + # 加载预训练模型 - model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT) + self.model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT) + logger.info(f"模型加载成功,词向量维度: {self.model.vector_size}") + + # 初始化Elasticsearch连接 + self.es = Elasticsearch( + hosts=es_config['hosts'], + basic_auth=es_config['basic_auth'], + verify_certs=False + ) + self.index_name = es_config['index_name'] + + def text_to_embedding(self, text): + # 使用已加载的模型 # 对文本分词并计算平均向量 words = jieba.lcut(text) - vectors = [model[word] for word in words if word in model] + vectors = [self.model[word] for word in words if word in self.model] if not vectors: - return [0.0] * model.vector_size + return [0.0] * self.model.vector_size # 计算平均向量 avg_vector = [sum(dim)/len(vectors) for dim in zip(*vectors)] diff --git a/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc index f57645b76a53df43ffe44389e2a7afbeccdc4c35..b32117713734e67046d521e9d0bddaa3ab66bb1b 100644 GIT binary patch delta 2037 zcmZt{O>f*(@V#fR*Xvzxl1&PYC?P5;bZJQ`s8tJTC0svugp7?s7@_pAA01`9L+q8UGdgGb#e9ye`FMI!d zv=h6oL*V&+;@!D@eT4jq#Dh-`h|_S?bpV`j8WJyiQ;!18LKYdGK{KA|nZPr`Tx59` zC4y|`g?*EH1yR^^1Zfs;QsB~NiJLsft@REA(%^Y+^8$C)4bK(sW*OA_IEZTD!e+m4 z5qpjB5_c~X?p|Wv0H^0kwR{(XBGXlx4xWn>KM7imsUQ?*{1y*|1h)HX=&K~CE8)xf zTxy<;8|p3K+hQqDNiBZw;z}8LM^xg=WGsvu0U`b5E@oY2sU7)CFE1xTrNtS)esN)b zGG1sUE7b!BeN|6_NL1D6)1wCthGJ2KEq|Aegi?M3zScJehR>Y^V}f9{JjN+!mx*U^ zqxXh70B!OCEW%~#MJOaR)$1bSQ0vPGaB;F4O2 z6NtMwEh%AyY+OCsnSv0lnV{v%&=9c!(gSFll^EB%dH*CJ&2(LIj{>Vhz$aGjDUZIft6@Ktr$a;85XsmsrFyx zTpdcZ`2t z%&JsA0u<`@-fvhvoU+%bAu zcmI}==JW*0qv+Sf`2fd<&>HMil3LmR(c^J%R8UkN>waxLKB5byf%fg*8#BGM#=AS# zhwtE!!BF}MY%ym+30VtT^N=l;vubEB@;F+=2?T~tCv$<^e|aeC?6RPyw|>fBppz$1 z>;+Wrjq-ivar|`Zf6TaZ%QG_Iwan1lTlGSVzUf(6__B(|47qDqP9P)eM^>m}rdUgs z=Y>4>Uk5rP-rZk3CI$(Q>ieyvv0Jl=R{cgkXw_VQ*Yp3<)Yw$RB66vH!Xq4l-@iEY@5P;)bTm*{myiRyPZ7VYpR|Y idmPr)LMSHiKC8U|AU#^5HY-ubDA6)2!=7h{zH`D>KF^Y>PKT9D)NsA&Sl63Xdg04kuq;VL1&NrTf*p{E z3oa~pT4G8#!abmo4Vfo8kU5a~q6?WTrbQqoAoIi|oYX)1oif@9xpIM0dChY+H_IT^ zuB_r=Srj4DRwc7Fh2=gYDroxr2JJLo^H29Jn6hDT7}YlrYO+CVP*iilzUNO!ZT(`8 z=~KGL9+Nd{6nbNJi#(!F=pn6Ug*~E&WW$*uE95Z+gab8rwR=c^Ac)%IV~&2@`2u7( zgS1H0j<&NjPwOEi&2RR(!9_TBM8;&*-=kw7+a`(~k}*9`eq{IA3J3;(2|&T-Kh)NP zQVFou&PyS)c6kCY+F#1jsveg4=FJQ6{x8~ak?bxs_ubhha=zV1CLrO}R<3y_!5Y=a zja;NT5W3rPSeL3grBr8Nc@Bm;3nBCr8l`8lG7&MVSb!bzTB-7+R_LWdGqt^u%0UuY z*>J1Y-Ig@2(%nRAzMF1_mg*tQs^!C4+sIQzezHA^Tj+7EY6c0h5M`>HMtEG8vCIcj zh_o1?xC+;%n9usuV!fjccah%Kep(3mFfNi@KI7UpZ|VvFp%cFMDTw#rkyW0bDi&K*gWl=TfPcp(1pmU-p7FL1`}B7F(GEJ z@pVm6t%Sv+|2nI5`2&P4;O1A<;Os{jKF07GO|ZT*P^pMpqNSssy9K?gont@WK{BpR zh}9xaw71aO(rq`!_0FbL*WNt9RPE5>sR7t0%NJoGLXL zUYBwpcreW6dBme$&!6!7zB@5#*20d$uR;g%p4iz?0MP4idKkarQdTo!BT kXuf@%wxVHs$*dCz+FJsGtxVper~~iHBR$%qQ>@Ql0AvLrv;Y7A