From f41ab11dfd6a06f823a1881c3f5783cf7d3739e0 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 23 Jun 2025 18:57:40 +0800 Subject: [PATCH] 'commit' --- dsRag/Config/Config.py | 8 ++++++ dsRag/Config/__init__.py | 0 dsRag/T3_LinkEs.py | 22 ++++---------- dsRag/T4_VectorSave.py | 62 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 16 deletions(-) create mode 100644 dsRag/Config/Config.py create mode 100644 dsRag/Config/__init__.py create mode 100644 dsRag/T4_VectorSave.py diff --git a/dsRag/Config/Config.py b/dsRag/Config/Config.py new file mode 100644 index 00000000..a93be2c4 --- /dev/null +++ b/dsRag/Config/Config.py @@ -0,0 +1,8 @@ +# Elasticsearch配置 +ES_CONFIG = { + "hosts": "https://10.10.14.206:9200", + "basic_auth": ("elastic", "your_password"), + "verify_certs": False, + "ssl_show_warn": False, + "default_index": "knowledge_base" +} \ No newline at end of file diff --git a/dsRag/Config/__init__.py b/dsRag/Config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsRag/T3_LinkEs.py b/dsRag/T3_LinkEs.py index 0d8b509c..9bcef1cd 100644 --- a/dsRag/T3_LinkEs.py +++ b/dsRag/T3_LinkEs.py @@ -1,23 +1,13 @@ +from Config.Config import ES_CONFIG from elasticsearch import Elasticsearch -import ssl import warnings -from urllib3.exceptions import InsecureRequestWarning - -# Suppress only the single InsecureRequestWarning from urllib3 -warnings.filterwarnings('ignore', category=InsecureRequestWarning) -warnings.filterwarnings('once', category=DeprecationWarning) -warnings.filterwarnings('once', category=UserWarning) - -context = ssl.create_default_context() -context.check_hostname = False -context.verify_mode = ssl.CERT_NONE +# 初始化ES连接 es = Elasticsearch( - hosts="https://10.10.14.206:9200", - basic_auth=("elastic", "jv9h8uwRrRxmDi1dq6u8"), - ssl_context=context, - verify_certs=False, - ssl_show_warn=False # This will suppress the Elasticsearch warning + hosts=ES_CONFIG["hosts"], + basic_auth=ES_CONFIG["basic_auth"], + verify_certs=ES_CONFIG["verify_certs"], + ssl_show_warn=ES_CONFIG["ssl_show_warn"] ) try: diff --git a/dsRag/T4_VectorSave.py b/dsRag/T4_VectorSave.py new file mode 100644 index 00000000..e66fb5e9 --- /dev/null +++ b/dsRag/T4_VectorSave.py @@ -0,0 +1,62 @@ +from Config.Config import ES_CONFIG +from elasticsearch import Elasticsearch +from T2_Txt2Vec import text_to_embedding +import datetime +import warnings + +# 初始化ES连接 +es = Elasticsearch( + hosts=ES_CONFIG["hosts"], + basic_auth=ES_CONFIG["basic_auth"], + verify_certs=ES_CONFIG["verify_certs"], + ssl_show_warn=ES_CONFIG["ssl_show_warn"] +) + +# 修改create_vector_index和save_to_es函数中使用ES_CONFIG["default_index"] +def create_vector_index(index_name="knowledge_base"): + """创建带有向量字段的索引""" + mapping = { + "mappings": { + "properties": { + "text": {"type": "text", "analyzer": "ik_max_word"}, + "vector": { + "type": "dense_vector", + "dims": 768, # 需与text2vec模型维度一致 + "index": True, + "similarity": "cosine" + }, + "timestamp": {"type": "date"} + } + } + } + + try: + if es.indices.exists(index=index_name): + es.indices.delete(index=index_name) + es.indices.create(index=index_name, body=mapping) + print(f"索引 {index_name} 创建成功") + except Exception as e: + print(f"创建索引失败: {str(e)}") + raise + +def save_to_es(text, index_name="knowledge_base"): + """将文本向量化后保存到ES""" + vector = text_to_embedding(text) + doc = { + "text": text, + "vector": vector, + "timestamp": datetime.datetime.now().isoformat() + } + + try: + res = es.index(index=index_name, document=doc) + print(f"文档已保存,ID: {res['_id']}") + return res + except Exception as e: + print(f"保存到ES失败: {str(e)}") + raise + +# 使用示例 +if __name__ == "__main__": + create_vector_index() # 首次运行前执行 + save_to_es("如何更换支付宝绑定银行卡") \ No newline at end of file