'commit'

4 weeks ago · 9e82dc00ce
parent 9e0e39a6fc
commit 9e82dc00ce
4 changed files with 490 additions and 665 deletions
--- a/dsRag/ElasticSearch/T4_SelectAllData.py
+++ b/dsRag/ElasticSearch/T4_SelectAllData.py
@ -16,11 +16,12 @@ es = Elasticsearch(
 def select_all_data(index_name):
    try:
        # 构建查询条件 - 匹配所有文档
+        # 修改查询条件为获取前10条数据
        query = {
            "query": {
                "match_all": {}
            },
-            "size": 1000  # 限制返回结果数量
+            "size": 10  # 仅获取10条数据
        }
        
        # 执行查询
@ -36,6 +37,12 @@ def select_all_data(index_name):
                print(f"   内容: {hit['_source'].get('user_input', '')}")
                print(f"   标签: {hit['_source'].get('tags', '')}")
                print(f"   时间戳: {hit['_source'].get('timestamp', '')}")
+                # 在循环中添加打印完整数据结构的代码
+                if i == 1:  # 只打印第一条数据的完整结构
+                    print("完整数据结构:")
+                    import pprint
+                    pp = pprint.PrettyPrinter(indent=4)
+                    pp.pprint(hit['_source'])
                print("-" * 50)
    except Exception as e:
        print(f"查询出错: {e}")
--- a/dsRag/ElasticSearch/T5_SelectByTags.py
+++ b/dsRag/ElasticSearch/T5_SelectByTags.py
@ -21,13 +21,19 @@ query_tag = ["MATH_DATA_1", "小学数学"]  # 可以修改为其他需要的标
 query = {
    "query": {
        "bool": {
-            "must": [
+            "should": [
                {
-                    "terms": {
-                        "tags.tags": query_tag
+                    "match": {
+                        "tags": "MATH_DATA_1"
+                    }
+                },
+                {
+                    "match": {
+                        "tags": "小学数学"
                    }
                }
-            ]
+            ],
+            "minimum_should_match": 1
        }
    },
    "size": 1000
@ -45,9 +51,15 @@ try:
        for i, hit in enumerate(hits, 1):
            print(f"{i}. ID: {hit['_id']}")
            print(f"   内容: {hit['_source'].get('user_input', '')}")
-            print(f"   标签: {hit['_source'].get('tags', '')}")
+            print(f"   标签: {hit['_source']['tags']['tags']}")
            print("-" * 50)
 except Exception as e:
+    print(f"查询执行失败，错误详情: {str(e)}")
+    print(f"查询条件: {query}")
+    if hasattr(e, 'info') and isinstance(e.info, dict):
+        print(f"Elasticsearch错误详情: {e.info}")
+    if hasattr(e, 'status_code'):
+        print(f"HTTP状态码: {e.status_code}")
    print(f"查询出错: {str(e)}")

 # 4. 执行查询
--- a/dsRag/T4运行结果.txt
+++ b/dsRag/T4运行结果.txt
--- a/dsRag/日志.txt
+++ b/dsRag/日志.txt