From e40718658beaa1047aa7f6c650a27ce9c3058d61 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Fri, 27 Jun 2025 11:29:07 +0800 Subject: [PATCH] 'commit' --- dsRag/Config/Config.py | 2 +- .../Config/__pycache__/Config.cpython-310.pyc | Bin 1000 -> 997 bytes dsRag/ElasticSearch/T2_CreatingMapping.py | 2 +- dsRag/ElasticSearch/T4_InsertData.py | 2 +- dsRag/ElasticSearch/T5_select_all_data.py | 49 ++++++++++++++++++ 5 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 dsRag/ElasticSearch/T5_select_all_data.py diff --git a/dsRag/Config/Config.py b/dsRag/Config/Config.py index 33184b88..51186dc2 100644 --- a/dsRag/Config/Config.py +++ b/dsRag/Config/Config.py @@ -11,7 +11,7 @@ ES_CONFIG = { "basic_auth": ("elastic", "jv9h8uwRrRxmDi1dq6u8"), "verify_certs": False, "ssl_show_warn": False, - "default_index": "knowledge_base" + "index_name": "knowledge_base" } diff --git a/dsRag/Config/__pycache__/Config.cpython-310.pyc b/dsRag/Config/__pycache__/Config.cpython-310.pyc index 93fec98664a40a3abdb9b05c69dd304ba350214b..1aa3805ada5333d0cb7209d3c3de6d9ca25661a1 100644 GIT binary patch delta 39 tcmaFC{*;|JpO=@50SH#}$7Mur{My3l;zX delta 42 wcmaFL{(_x1pO=@50SH`p;xb}3@|rX9@usAvC6?xt#AoKEq*iS9XPm|a0O$D&_5c6? diff --git a/dsRag/ElasticSearch/T2_CreatingMapping.py b/dsRag/ElasticSearch/T2_CreatingMapping.py index 532d9cd8..dfbb824f 100644 --- a/dsRag/ElasticSearch/T2_CreatingMapping.py +++ b/dsRag/ElasticSearch/T2_CreatingMapping.py @@ -31,7 +31,7 @@ mapping = { } # 创建索引 -index_name = Config.ES_CONFIG['default_index'] +index_name = Config.ES_CONFIG['index_name'] if es.indices.exists(index=index_name): es.indices.delete(index=index_name) print(f"删除已存在的索引 '{index_name}'") diff --git a/dsRag/ElasticSearch/T4_InsertData.py b/dsRag/ElasticSearch/T4_InsertData.py index 29db6ea6..af689afe 100644 --- a/dsRag/ElasticSearch/T4_InsertData.py +++ b/dsRag/ElasticSearch/T4_InsertData.py @@ -63,7 +63,7 @@ for filename in os.listdir(txt_dir): 'timestamp': timestamp, 'embedding': embedding } - es.index(index=ES_CONFIG['default_index'], document=doc) + es.index(index=ES_CONFIG['index_name'], document=doc) print(f"文件 {filename} 数据插入成功") print("所有文件处理完成") \ No newline at end of file diff --git a/dsRag/ElasticSearch/T5_select_all_data.py b/dsRag/ElasticSearch/T5_select_all_data.py new file mode 100644 index 00000000..b2623faa --- /dev/null +++ b/dsRag/ElasticSearch/T5_select_all_data.py @@ -0,0 +1,49 @@ +from elasticsearch import Elasticsearch +import warnings +from Config import Config +from Config.Config import ES_CONFIG + +# 1. 初始化Elasticsearch连接 +warnings.filterwarnings('ignore', category=DeprecationWarning) +es = Elasticsearch( + hosts=Config.ES_CONFIG['hosts'], + basic_auth=Config.ES_CONFIG['basic_auth'], + verify_certs=False +) + +# 2. 直接在代码中指定要查询的标签 +query_tag = "MATH_DATA_1" # 可以修改为其他需要的标签 + +# 3. 构建查询条件 +query = { + "query": { + "bool": { + "must": [ + { + "term": { + "tags": query_tag + } + } + ] + } + }, + "size": 1000 +} + +# 4. 执行查询 +try: + results = es.search(index=ES_CONFIG['index_name'], body=query) + print(f"查询标签 '{query_tag}' 结果:") + if results['hits']['hits']: + for hit in results['hits']['hits']: + doc = hit['_source'] + print(f"ID: {hit['_id']}") + print(f"标签: {doc['tags']['tags']}") + print(f"用户问题: {doc['user_input']}") + print(f"时间: {doc['timestamp']}") + print(f"向量: {doc['embedding'][:5]}...") + print("-" * 40) + else: + print(f"未找到标签为 '{query_tag}' 的数据。") +except Exception as e: + print(f"查询失败: {e}") \ No newline at end of file