From 305bd177bbe288bb4731a74b5ce649e2b48de00c Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 19 Aug 2025 09:21:32 +0800 Subject: [PATCH] 'commit' --- .../ElasticSearch/T1_RebuildMapping.py | 3 +- .../ElasticSearch/Utils/EsSearchUtil.py | 40 +++++++++--------- .../__pycache__/EsSearchUtil.cpython-310.pyc | Bin 6454 -> 6605 bytes 3 files changed, 22 insertions(+), 21 deletions(-) diff --git a/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py b/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py index 075c8263..a587e62c 100644 --- a/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py +++ b/dsSchoolBuddy/ElasticSearch/T1_RebuildMapping.py @@ -1,8 +1,7 @@ from Config import Config from ElasticSearch.Utils.EsSearchUtil import EsSearchUtil, disableWarning -# 禁用警告 -disableWarning() + # 创建EsSearchUtil实例 search_util = EsSearchUtil(Config.ES_CONFIG) diff --git a/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py b/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py index 9be86ef5..c93039b6 100644 --- a/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py +++ b/dsSchoolBuddy/ElasticSearch/Utils/EsSearchUtil.py @@ -18,9 +18,6 @@ def disableWarning(): warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure') warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host') -# 初始化配置 -disableWarning() - # 初始化日志 logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -31,6 +28,8 @@ class EsSearchUtil: 初始化Elasticsearch搜索工具 :param es_config: Elasticsearch配置字典,包含hosts, username, password, index_name等 """ + # 禁用警告 + disableWarning() self.es_config = es_config # 初始化连接池 @@ -154,16 +153,15 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: bool: 插入是否成功 """ try: - # 1. 初始化Elasticsearch连接 - es = Elasticsearch( - hosts=Config.ES_CONFIG['hosts'], - basic_auth=Config.ES_CONFIG['basic_auth'], - verify_certs=False - ) + # 1. 创建EsSearchUtil实例以使用连接池 + search_util = EsSearchUtil(Config.ES_CONFIG) + + # 2. 从连接池获取连接 + conn = search_util.es_pool.get_connection() - # 2. 检查索引是否存在,不存在则创建 + # 3. 检查索引是否存在,不存在则创建 index_name = Config.ES_CONFIG['index_name'] - if not es.indices.exists(index=index_name): + if not conn.indices.exists(index=index_name): # 定义mapping结构 mapping = { "mappings": { @@ -186,33 +184,33 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: } } } - es.indices.create(index=index_name, body=mapping) + conn.indices.create(index=index_name, body=mapping) print(f"索引 '{index_name}' 创建成功") - # 3. 切割文本 + # 4. 切割文本 text_chunks = split_text_into_chunks(long_text) - # 4. 准备标签 + # 5. 准备标签 if tags is None: tags = ["general_text"] - # 5. 获取当前时间 + # 6. 获取当前时间 timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - # 6. 创建嵌入模型 + # 7. 创建嵌入模型 embeddings = OpenAIEmbeddings( model=Config.EMBED_MODEL_NAME, base_url=Config.EMBED_BASE_URL, api_key=SecretStr(Config.EMBED_API_KEY) ) - # 7. 为每个文本块生成向量并插入 + # 8. 为每个文本块生成向量并插入 for i, chunk in enumerate(text_chunks): # 生成文本块的哈希值作为文档ID doc_id = hashlib.md5(chunk.encode('utf-8')).hexdigest() # 检查文档是否已存在 - if es.exists(index=index_name, id=doc_id): + if conn.exists(index=index_name, id=doc_id): print(f"文档块 {i+1} 已存在,跳过插入: {doc_id}") continue @@ -228,13 +226,17 @@ def insert_long_text_to_es(long_text: str, tags: list = None) -> bool: } # 插入数据到Elasticsearch - es.index(index=index_name, id=doc_id, document=doc) + conn.index(index=index_name, id=doc_id, document=doc) print(f"文档块 {i+1} 插入成功: {doc_id}") return True except Exception as e: print(f"插入数据失败: {e}") return False + finally: + # 确保释放连接回连接池 + if 'conn' in locals() and 'search_util' in locals(): + search_util.es_pool.release_connection(conn) diff --git a/dsSchoolBuddy/ElasticSearch/Utils/__pycache__/EsSearchUtil.cpython-310.pyc b/dsSchoolBuddy/ElasticSearch/Utils/__pycache__/EsSearchUtil.cpython-310.pyc index 8fa38d1441e8e11a017da5fe94c01df160e8758a..eefb7dbde9ef63f7711ef0473ae0a8348dade859 100644 GIT binary patch delta 998 zcmY*XO=uHQ5PtLaWwY7rZkjY{vu&!?R~ykf7i}JVlC^9=u2|9=v$y&65{Fu|t8KyWESuCSt^hN8+bf_hod4ndhHW!O-}*DFypBr8N}@5EKm4X=UNLPsC4TgWM< z33OeD+LYcZ;Am~$Tmu*ld#$GlSLF=?gBf6?Yu~JE9LnL@#41dOTZvP13CgKnE~$+< z&e+VanRTD}P~as9xa0+`BC0IV)QS=u2&ok!1zif{WhnM2*-A8$M2sy9G%=nBwCHQo zcPDa|IVQ+VY@oH|vdBXof{4`Sh$h{5`F_=vILkdYk}dXq+Wu{?9P?jJctFR-&jdYvywr# z3RS2yQCo9y>V|reacn7b?+FQLl`+gUGZN7z<4bHsC?WUr(x@~`bBqdcADx1wt`Vs( zY09XyaR0IhY~so)E@@+XS*y46m+ip6Yy*r&>A>!P%OmKpVreV6S zMU^dpXYCe}Z~t35&D7gJ~UWH<)z|3{!6BAKX;#ssI20 delta 874 zcmYjPO-NKx6u#%apLz4%yf-@I{JciX(h_kI)JAHNkc0~(!i5Nina9*gvfex~<-Qb# zc1r81q2@ zf=eE9iFJ+&stt}B>J^n%D3pm&hK;GdR@c!Gtf)qB)j@z*nAV4_HsUI!2y`u4&r3P zJxjWzriv2qJzsNKpa@=6imFgZC6op*h6cvbL<a`_1)~h}YBD)_?}oms=VvFi8US zB23(0;a-IV91sS!=NO4ND9ly1rb;2#uZUPZBjQt3h==GDEP01WeTlfxg@qk8HAoOw zR&t31c2<Io-K?Z~NBz&i$7=l^5HOUwztGKhfTu z_f%ONm%zi%EgKo{B+a`WZ$yTA2tM-3Qur=&txzSWbd`WxoYOM`4*Cx><^Wg1hM5m1 zvb~PZ9gx4V!hd>m(d%q4=u?vEV)QMSf#x|$Xq6_%y@~0ud4Fa$%()%~l}S YV7TJ8Il68t32=DpXx=r(!>4ZNFS%9HEC2ui