diff --git a/dsRag/ElasticSearch/T6_XiangLiangQuery.py b/dsRag/ElasticSearch/T6_XiangLiangQuery.py index 91a9b3d0..215ab7ad 100644 --- a/dsRag/ElasticSearch/T6_XiangLiangQuery.py +++ b/dsRag/ElasticSearch/T6_XiangLiangQuery.py @@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', message='Unverified HTTPS request is being mad if __name__ == "__main__": # 测试查询 - query = "小学数学中有哪些模型" + # query = "小学数学中有哪些模型" + query = "文言虚词" query_tags = ["MATH_1"] # 默认搜索标签,可修改 print(f"\n=== 开始执行查询 ===") print(f"原始查询文本: {query}") @@ -89,11 +90,14 @@ if __name__ == "__main__": # 打印详细结果 print("\n=== 最终搜索结果 ===") - print(f" 向量搜索结果: {len(vector_results['hits']['hits'])}条") + + vector_int = 0 for i, hit in enumerate(vector_results['hits']['hits'], 1): - print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") - print(f" 内容: {hit['_source']['user_input']}") - # print(f" 详细: {hit['_source']['tags']['full_content']}") + if hit['_score'] > 0.4: # 阀值0.4 + print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") + print(f" 内容: {hit['_source']['user_input']}") + vector_int = vector_int + 1 + print(f" 向量搜索结果: {vector_int}条") print("\n文本精确搜索结果:") for i, hit in enumerate(text_results['hits']['hits']): diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py index 09d1c569..36dea7c1 100644 --- a/dsRag/Util/EsSearchUtil.py +++ b/dsRag/Util/EsSearchUtil.py @@ -172,7 +172,19 @@ class EsSearchUtil: "size": 3 } ) - logger.info(f"5. 向量搜索结果数量: {len(vector_results['hits']['hits'])}") + # 处理一下,判断是否到达阀值 + filtered_vector_hits = [] + vector_int = 0 + for hit in vector_results['hits']['hits']: + if hit['_score'] > 0.4: # 阀值0.4 + logger.info(f" {vector_int + 1}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") + logger.info(f" 内容: {hit['_source']['user_input']}") + filtered_vector_hits.append(hit) + vector_int += 1 + + # 更新vector_results只包含通过过滤的文档 + vector_results['hits']['hits'] = filtered_vector_hits + logger.info(f"5. 向量搜索结果数量(过滤后): {vector_int}") # 文本精确搜索 logger.info("\n=== 文本精确搜索阶段 ===")