from datetime import datetime from elasticsearch import Elasticsearch from Config.Config import ES_CONFIG from T2_Txt2Vec import text_to_embedding from Util.EsMappingUtil import create_vector_index # 初始化ES连接 es = Elasticsearch( hosts=ES_CONFIG["hosts"], basic_auth=ES_CONFIG["basic_auth"], verify_certs=ES_CONFIG["verify_certs"], ssl_show_warn=ES_CONFIG["ssl_show_warn"] ) def save_vector(text, index_name="knowledge_base"): """将文本向量化后保存到ES""" try: # 向量化文本 vector = text_to_embedding(text) # 准备文档 doc = { "text": text, "vector": vector, "timestamp": datetime.now() } # 保存到ES res = es.index(index=index_name, document=doc) print(f"向量化文档已保存,ID: {res['_id']}") return res['_id'] except Exception as e: print(f"保存失败: {str(e)}") raise def save_raw_text(text, index_name="raw_texts"): """保存原始文本到ES""" try: # 准备文档 doc = { "text": text, "timestamp": datetime.now() } # 保存到ES res = es.index(index=index_name, document=doc) print(f"原始文本已保存,ID: {res['_id']}") return res['_id'] except Exception as e: print(f"保存失败: {str(e)}") raise # 使用示例 if __name__ == "__main__": # 创建向量索引 create_vector_index(dims=200) # 示例文本 sample_text = "如何更换支付宝绑定银行卡" # 保存向量化文档 save_vector(sample_text) # 保存原始文本 save_raw_text(sample_text)