diff --git a/dsRag/T4_ManageMapping.py b/dsRag/T4_ManageMapping.py new file mode 100644 index 00000000..6a61336e --- /dev/null +++ b/dsRag/T4_ManageMapping.py @@ -0,0 +1,32 @@ +from elasticsearch import Elasticsearch + +from Config.Config import ES_CONFIG +from Util.EsMappingUtil import create_vector_index, delete_index # 导入工具函数 + +# 初始化ES连接 +es = Elasticsearch( + hosts=ES_CONFIG["hosts"], + basic_auth=ES_CONFIG["basic_auth"], + verify_certs=ES_CONFIG["verify_certs"], + ssl_show_warn=ES_CONFIG["ssl_show_warn"] +) + +def manage_index(action, index_name="knowledge_base", dims=200): + """管理Elasticsearch索引 + :param action: 'create'或'delete' + :param index_name: 索引名称 + :param dims: 向量维度(仅创建时有效) + """ + if action == "create": + return create_vector_index(index_name, dims) + elif action == "delete": + return delete_index(index_name) + else: + raise ValueError("action参数必须是'create'或'delete'") + +# 使用示例 +if __name__ == "__main__": + # 删除索引 + manage_index("delete") + # 创建索引 + manage_index("create", dims=200) diff --git a/dsRag/T4_VectorSave.py b/dsRag/T4_VectorSave.py deleted file mode 100644 index 712df513..00000000 --- a/dsRag/T4_VectorSave.py +++ /dev/null @@ -1,54 +0,0 @@ -import datetime - -from elasticsearch import Elasticsearch - -from Config.Config import ES_CONFIG -from T2_Txt2Vec import text_to_embedding -from Util.EsMappingUtil import create_vector_index # 导入工具函数 - -# 初始化ES连接 -es = Elasticsearch( - hosts=ES_CONFIG["hosts"], - basic_auth=ES_CONFIG["basic_auth"], - verify_certs=ES_CONFIG["verify_certs"], - ssl_show_warn=ES_CONFIG["ssl_show_warn"] -) - -def save_to_es(text, index_name="knowledge_base"): - """将文本向量化后保存到ES""" - # 检查是否已存在相同文本 - query = { - "query": { - "term": { - "text.keyword": { - "value": text - } - } - } - } - - exists = es.search(index=index_name, body=query) - if exists["hits"]["total"]["value"] > 0: - print(f"文档已存在,跳过保存: {text}") - return exists["hits"]["hits"][0]["_id"] # 返回现有文档ID - - # 保存新文档 - vector = text_to_embedding(text) - doc = { - "text": text, - "vector": vector, - "timestamp": datetime.datetime.now().isoformat() - } - - try: - res = es.index(index=index_name, document=doc) - print(f"文档已保存,ID: {res['_id']}") - return res["_id"] - except Exception as e: - print(f"保存到ES失败: {str(e)}") - raise - -# 使用示例 -if __name__ == "__main__": - create_vector_index(dims=200) # 使用工具函数创建索引 - save_to_es("如何更换支付宝绑定银行卡") \ No newline at end of file diff --git a/dsRag/T5_VectorSave.py b/dsRag/T5_VectorSave.py new file mode 100644 index 00000000..063aabb7 --- /dev/null +++ b/dsRag/T5_VectorSave.py @@ -0,0 +1,20 @@ +import datetime + +from elasticsearch import Elasticsearch + +from Config.Config import ES_CONFIG +from T2_Txt2Vec import text_to_embedding +from Util.EsMappingUtil import create_vector_index # 导入工具函数 + +# 初始化ES连接 +es = Elasticsearch( + hosts=ES_CONFIG["hosts"], + basic_auth=ES_CONFIG["basic_auth"], + verify_certs=ES_CONFIG["verify_certs"], + ssl_show_warn=ES_CONFIG["ssl_show_warn"] +) + + +# 使用示例 +if __name__ == "__main__": + create_vector_index(dims=200) # 使用工具函数创建索引 diff --git a/dsRag/Util/EsMappingUtil.py b/dsRag/Util/EsMappingUtil.py index 7aa01d88..65dd4548 100644 --- a/dsRag/Util/EsMappingUtil.py +++ b/dsRag/Util/EsMappingUtil.py @@ -46,4 +46,19 @@ def create_vector_index(index_name="knowledge_base", dims=200): return True except Exception as e: print(f"操作索引失败: {str(e)}") + raise + + +def delete_index(index_name): + """删除Elasticsearch索引""" + try: + if es.indices.exists(index=index_name): + es.indices.delete(index=index_name) + print(f"索引 {index_name} 删除成功") + return True + else: + print(f"索引 {index_name} 不存在") + return False + except Exception as e: + print(f"删除索引失败: {str(e)}") raise \ No newline at end of file diff --git a/dsRag/Util/__pycache__/EsMappingUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsMappingUtil.cpython-310.pyc new file mode 100644 index 00000000..ca99ac22 Binary files /dev/null and b/dsRag/Util/__pycache__/EsMappingUtil.cpython-310.pyc differ