From a145af2490e210aaadc334184e43e7beaf59bf56 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 23 Jun 2025 19:26:20 +0800 Subject: [PATCH] 'commit' --- dsRag/T4_ManageMapping.py | 32 +++++++++++ dsRag/T4_VectorSave.py | 54 ------------------ dsRag/T5_VectorSave.py | 20 +++++++ dsRag/Util/EsMappingUtil.py | 15 +++++ .../__pycache__/EsMappingUtil.cpython-310.pyc | Bin 0 -> 1930 bytes 5 files changed, 67 insertions(+), 54 deletions(-) create mode 100644 dsRag/T4_ManageMapping.py delete mode 100644 dsRag/T4_VectorSave.py create mode 100644 dsRag/T5_VectorSave.py create mode 100644 dsRag/Util/__pycache__/EsMappingUtil.cpython-310.pyc diff --git a/dsRag/T4_ManageMapping.py b/dsRag/T4_ManageMapping.py new file mode 100644 index 00000000..6a61336e --- /dev/null +++ b/dsRag/T4_ManageMapping.py @@ -0,0 +1,32 @@ +from elasticsearch import Elasticsearch + +from Config.Config import ES_CONFIG +from Util.EsMappingUtil import create_vector_index, delete_index # 导入工具函数 + +# 初始化ES连接 +es = Elasticsearch( + hosts=ES_CONFIG["hosts"], + basic_auth=ES_CONFIG["basic_auth"], + verify_certs=ES_CONFIG["verify_certs"], + ssl_show_warn=ES_CONFIG["ssl_show_warn"] +) + +def manage_index(action, index_name="knowledge_base", dims=200): + """管理Elasticsearch索引 + :param action: 'create'或'delete' + :param index_name: 索引名称 + :param dims: 向量维度(仅创建时有效) + """ + if action == "create": + return create_vector_index(index_name, dims) + elif action == "delete": + return delete_index(index_name) + else: + raise ValueError("action参数必须是'create'或'delete'") + +# 使用示例 +if __name__ == "__main__": + # 删除索引 + manage_index("delete") + # 创建索引 + manage_index("create", dims=200) diff --git a/dsRag/T4_VectorSave.py b/dsRag/T4_VectorSave.py deleted file mode 100644 index 712df513..00000000 --- a/dsRag/T4_VectorSave.py +++ /dev/null @@ -1,54 +0,0 @@ -import datetime - -from elasticsearch import Elasticsearch - -from Config.Config import ES_CONFIG -from T2_Txt2Vec import text_to_embedding -from Util.EsMappingUtil import create_vector_index # 导入工具函数 - -# 初始化ES连接 -es = Elasticsearch( - hosts=ES_CONFIG["hosts"], - basic_auth=ES_CONFIG["basic_auth"], - verify_certs=ES_CONFIG["verify_certs"], - ssl_show_warn=ES_CONFIG["ssl_show_warn"] -) - -def save_to_es(text, index_name="knowledge_base"): - """将文本向量化后保存到ES""" - # 检查是否已存在相同文本 - query = { - "query": { - "term": { - "text.keyword": { - "value": text - } - } - } - } - - exists = es.search(index=index_name, body=query) - if exists["hits"]["total"]["value"] > 0: - print(f"文档已存在,跳过保存: {text}") - return exists["hits"]["hits"][0]["_id"] # 返回现有文档ID - - # 保存新文档 - vector = text_to_embedding(text) - doc = { - "text": text, - "vector": vector, - "timestamp": datetime.datetime.now().isoformat() - } - - try: - res = es.index(index=index_name, document=doc) - print(f"文档已保存,ID: {res['_id']}") - return res["_id"] - except Exception as e: - print(f"保存到ES失败: {str(e)}") - raise - -# 使用示例 -if __name__ == "__main__": - create_vector_index(dims=200) # 使用工具函数创建索引 - save_to_es("如何更换支付宝绑定银行卡") \ No newline at end of file diff --git a/dsRag/T5_VectorSave.py b/dsRag/T5_VectorSave.py new file mode 100644 index 00000000..063aabb7 --- /dev/null +++ b/dsRag/T5_VectorSave.py @@ -0,0 +1,20 @@ +import datetime + +from elasticsearch import Elasticsearch + +from Config.Config import ES_CONFIG +from T2_Txt2Vec import text_to_embedding +from Util.EsMappingUtil import create_vector_index # 导入工具函数 + +# 初始化ES连接 +es = Elasticsearch( + hosts=ES_CONFIG["hosts"], + basic_auth=ES_CONFIG["basic_auth"], + verify_certs=ES_CONFIG["verify_certs"], + ssl_show_warn=ES_CONFIG["ssl_show_warn"] +) + + +# 使用示例 +if __name__ == "__main__": + create_vector_index(dims=200) # 使用工具函数创建索引 diff --git a/dsRag/Util/EsMappingUtil.py b/dsRag/Util/EsMappingUtil.py index 7aa01d88..65dd4548 100644 --- a/dsRag/Util/EsMappingUtil.py +++ b/dsRag/Util/EsMappingUtil.py @@ -46,4 +46,19 @@ def create_vector_index(index_name="knowledge_base", dims=200): return True except Exception as e: print(f"操作索引失败: {str(e)}") + raise + + +def delete_index(index_name): + """删除Elasticsearch索引""" + try: + if es.indices.exists(index=index_name): + es.indices.delete(index=index_name) + print(f"索引 {index_name} 删除成功") + return True + else: + print(f"索引 {index_name} 不存在") + return False + except Exception as e: + print(f"删除索引失败: {str(e)}") raise \ No newline at end of file diff --git a/dsRag/Util/__pycache__/EsMappingUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsMappingUtil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca99ac228af4900e552f1268a17f8977d8054120 GIT binary patch literal 1930 zcmZ8i-ESL35Z~SV@Y%7QuTt6)i2wz~FXaJVh!8?bQ^W%Wkyb5MCF|c$#N`GkKPGFOYnK_l%KT|2lXlIT~h0OSb1bMNinWI01s> z=vZOo?fv`Sd?Pd_T+jEy)KS}WiUm9HC&Ki!kj}AMp(rG1GM-l|coXhSVaAq~oGx{6 zDh(ub{uu^IgtVp={3my9krMQx=|Ym#f#I;4%w3+20|5zqoh! z2=@-TateOlk?thi3*5W*_>qIYQ#vy0y%lGMr#;nL*f%cxc65-2N*b^8b^u`-qZVb1 zQT*tSOjG}hJ=Pz~CqNlyrz-ADN$_z|fR2hH17YXEb|EfRPksl+Cv|#~&a=6LKAmKf z8gzv=sNAbgdVHGK>L3TqG57)+5sOO(H@JC~&Fgi&PN$fB$v1hTPOcDc)hW>AqHz+? ze2DPW7y(_E(xvM<=rYP8yO?NObb|%n4)50)?`bfKZ^Yhu+qRp$>BP5cE+z8@C37jT z?DLa#&|e{o3_X58c>la{+?b&=B)2Vi2GWd9U5W1AiEb~qnx|t#MJtzDH+}&`#n8G= zWjEN#BoeHU>_wuwh(P`mF&>)^2D^YCjBfoLt$Z0Ze|T{Fv(>vdqdUv3mG4%s-WUq7 z>mbQJymz*BX=%NAqII$H;C}N!&;yFKmF4KdnTPi-cbx6s>g@hUv9kxa8`0Tc*FS6w zMfWd8ryIjTCUy&6!R6ZW`@;c_AzrjE$5iy8e$?f)0D~TkGj5$aAD#JTDA)#~IEwh% z=-N+fH@_bq+@DLupkXjK34ps(1jZDz4sL`l#9$kIp$3Q0s7j~ehh|X<7y(!!vqe$$ z9k&u{o-cD|m{KECsMuwpiWer@@8`RVfs~@M;V_@>2-K`m@!zya3^(kLFX;?{ay$SE z4=ZK?`m;1k2dKriQKrrxSZz^jLsbW;cBICJ)EpxKN~1OmpacK{D0QCjO&qOFptR=cajHO>40gih z(W#~NudZ!b%yD<*lc1G2>8k0_OEipoc_6?91`!?#efv~zV~hXyHX#wI@+l-(xzOaI zBzz&CMhVV^#6=6}c@FvKk!-B2xCyW(@r{D2{>xwxFCbO>9;kZ{)O}OMan0|C?T33G z+-^sYinqdy*!&M9u@zxv#H}22#`nZOQbThUGA=UJK8YST1*O7M@UUJjyF4g~*Kj;N WyeUo6uoO**N_`%s5~<$wj(-80CN>iQ literal 0 HcmV?d00001