diff --git a/dsRag/Config/Config.py b/dsRag/Config/Config.py index 33184b88..51186dc2 100644 --- a/dsRag/Config/Config.py +++ b/dsRag/Config/Config.py @@ -11,7 +11,7 @@ ES_CONFIG = { "basic_auth": ("elastic", "jv9h8uwRrRxmDi1dq6u8"), "verify_certs": False, "ssl_show_warn": False, - "default_index": "knowledge_base" + "index_name": "knowledge_base" } diff --git a/dsRag/Config/__pycache__/Config.cpython-310.pyc b/dsRag/Config/__pycache__/Config.cpython-310.pyc index 93fec986..1aa3805a 100644 Binary files a/dsRag/Config/__pycache__/Config.cpython-310.pyc and b/dsRag/Config/__pycache__/Config.cpython-310.pyc differ diff --git a/dsRag/ElasticSearch/T2_CreatingMapping.py b/dsRag/ElasticSearch/T2_CreatingMapping.py index 532d9cd8..dfbb824f 100644 --- a/dsRag/ElasticSearch/T2_CreatingMapping.py +++ b/dsRag/ElasticSearch/T2_CreatingMapping.py @@ -31,7 +31,7 @@ mapping = { } # 创建索引 -index_name = Config.ES_CONFIG['default_index'] +index_name = Config.ES_CONFIG['index_name'] if es.indices.exists(index=index_name): es.indices.delete(index=index_name) print(f"删除已存在的索引 '{index_name}'") diff --git a/dsRag/ElasticSearch/T4_InsertData.py b/dsRag/ElasticSearch/T4_InsertData.py index 29db6ea6..af689afe 100644 --- a/dsRag/ElasticSearch/T4_InsertData.py +++ b/dsRag/ElasticSearch/T4_InsertData.py @@ -63,7 +63,7 @@ for filename in os.listdir(txt_dir): 'timestamp': timestamp, 'embedding': embedding } - es.index(index=ES_CONFIG['default_index'], document=doc) + es.index(index=ES_CONFIG['index_name'], document=doc) print(f"文件 {filename} 数据插入成功") print("所有文件处理完成") \ No newline at end of file diff --git a/dsRag/ElasticSearch/T5_select_all_data.py b/dsRag/ElasticSearch/T5_select_all_data.py new file mode 100644 index 00000000..b2623faa --- /dev/null +++ b/dsRag/ElasticSearch/T5_select_all_data.py @@ -0,0 +1,49 @@ +from elasticsearch import Elasticsearch +import warnings +from Config import Config +from Config.Config import ES_CONFIG + +# 1. 初始化Elasticsearch连接 +warnings.filterwarnings('ignore', category=DeprecationWarning) +es = Elasticsearch( + hosts=Config.ES_CONFIG['hosts'], + basic_auth=Config.ES_CONFIG['basic_auth'], + verify_certs=False +) + +# 2. 直接在代码中指定要查询的标签 +query_tag = "MATH_DATA_1" # 可以修改为其他需要的标签 + +# 3. 构建查询条件 +query = { + "query": { + "bool": { + "must": [ + { + "term": { + "tags": query_tag + } + } + ] + } + }, + "size": 1000 +} + +# 4. 执行查询 +try: + results = es.search(index=ES_CONFIG['index_name'], body=query) + print(f"查询标签 '{query_tag}' 结果:") + if results['hits']['hits']: + for hit in results['hits']['hits']: + doc = hit['_source'] + print(f"ID: {hit['_id']}") + print(f"标签: {doc['tags']['tags']}") + print(f"用户问题: {doc['user_input']}") + print(f"时间: {doc['timestamp']}") + print(f"向量: {doc['embedding'][:5]}...") + print("-" * 40) + else: + print(f"未找到标签为 '{query_tag}' 的数据。") +except Exception as e: + print(f"查询失败: {e}") \ No newline at end of file