diff --git a/dsRag/T4_ManageMapping.py b/dsRag/T4_ManageMapping.py index 6a61336e..f7015797 100644 --- a/dsRag/T4_ManageMapping.py +++ b/dsRag/T4_ManageMapping.py @@ -1,7 +1,7 @@ from elasticsearch import Elasticsearch from Config.Config import ES_CONFIG -from Util.EsMappingUtil import create_vector_index, delete_index # 导入工具函数 +from Util.EsMappingUtil import create_vector_index, delete_index, create_text_index, delete_text_index # 初始化ES连接 es = Elasticsearch( @@ -11,22 +11,37 @@ es = Elasticsearch( ssl_show_warn=ES_CONFIG["ssl_show_warn"] ) -def manage_index(action, index_name="knowledge_base", dims=200): +def manage_index(action, index_type="vector", index_name=None, dims=200): """管理Elasticsearch索引 :param action: 'create'或'delete' - :param index_name: 索引名称 - :param dims: 向量维度(仅创建时有效) + :param index_type: 'vector'或'text' + :param index_name: 索引名称(默认根据类型自动生成) + :param dims: 向量维度(仅向量索引有效) """ + if index_name is None: + index_name = "knowledge_base" if index_type == "vector" else "raw_texts" + if action == "create": - return create_vector_index(index_name, dims) + if index_type == "vector": + return create_vector_index(index_name, dims) + else: + return create_text_index(index_name) elif action == "delete": - return delete_index(index_name) + if index_type == "vector": + return delete_index(index_name) + else: + return delete_text_index(index_name) else: raise ValueError("action参数必须是'create'或'delete'") # 使用示例 if __name__ == "__main__": - # 删除索引 - manage_index("delete") - # 创建索引 - manage_index("create", dims=200) + # 先删除现有索引(如果存在) + manage_index("delete", "vector") + manage_index("delete", "text") + + # 创建新的向量索引 + manage_index("create", "vector", dims=200) + + # 创建新的原始文本索引 + manage_index("create", "text") diff --git a/dsRag/T5_VectorSave.py b/dsRag/T5_VectorSave.py index 063aabb7..31dd2ad8 100644 --- a/dsRag/T5_VectorSave.py +++ b/dsRag/T5_VectorSave.py @@ -1,10 +1,9 @@ -import datetime - +from datetime import datetime from elasticsearch import Elasticsearch from Config.Config import ES_CONFIG from T2_Txt2Vec import text_to_embedding -from Util.EsMappingUtil import create_vector_index # 导入工具函数 +from Util.EsMappingUtil import create_vector_index # 初始化ES连接 es = Elasticsearch( @@ -14,7 +13,54 @@ es = Elasticsearch( ssl_show_warn=ES_CONFIG["ssl_show_warn"] ) +def save_vector(text, index_name="knowledge_base"): + """将文本向量化后保存到ES""" + try: + # 向量化文本 + vector = text_to_embedding(text) + + # 准备文档 + doc = { + "text": text, + "vector": vector, + "timestamp": datetime.now() + } + + # 保存到ES + res = es.index(index=index_name, document=doc) + print(f"向量化文档已保存,ID: {res['_id']}") + return res['_id'] + except Exception as e: + print(f"保存失败: {str(e)}") + raise + +def save_raw_text(text, index_name="raw_texts"): + """保存原始文本到ES""" + try: + # 准备文档 + doc = { + "text": text, + "timestamp": datetime.now() + } + + # 保存到ES + res = es.index(index=index_name, document=doc) + print(f"原始文本已保存,ID: {res['_id']}") + return res['_id'] + except Exception as e: + print(f"保存失败: {str(e)}") + raise # 使用示例 if __name__ == "__main__": - create_vector_index(dims=200) # 使用工具函数创建索引 + # 创建向量索引 + create_vector_index(dims=200) + + # 示例文本 + sample_text = "如何更换支付宝绑定银行卡" + + # 保存向量化文档 + save_vector(sample_text) + + # 保存原始文本 + save_raw_text(sample_text) diff --git a/dsRag/Util/EsMappingUtil.py b/dsRag/Util/EsMappingUtil.py index 65dd4548..9dbf66cf 100644 --- a/dsRag/Util/EsMappingUtil.py +++ b/dsRag/Util/EsMappingUtil.py @@ -61,4 +61,49 @@ def delete_index(index_name): return False except Exception as e: print(f"删除索引失败: {str(e)}") + raise + + +def create_text_index(index_name="raw_texts"): + """创建原始文本索引""" + mapping = { + "mappings": { + "properties": { + "text": { + "type": "text", + "analyzer": "ik_max_word", + "search_analyzer": "ik_smart" + }, + "timestamp": { + "type": "date" + } + } + } + } + + try: + if not es.indices.exists(index=index_name): + es.indices.create(index=index_name, body=mapping) + print(f"原始文本索引 {index_name} 创建成功") + return True + else: + print(f"原始文本索引 {index_name} 已存在") + return False + except Exception as e: + print(f"创建原始文本索引失败: {str(e)}") + raise + + +def delete_text_index(index_name="raw_texts"): + """删除原始文本索引""" + try: + if es.indices.exists(index=index_name): + es.indices.delete(index=index_name) + print(f"原始文本索引 {index_name} 删除成功") + return True + else: + print(f"原始文本索引 {index_name} 不存在") + return False + except Exception as e: + print(f"删除原始文本索引失败: {str(e)}") raise \ No newline at end of file