From 43b3d4f602493ce140e500d8e148bb0ce968934c Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 30 Jun 2025 08:53:41 +0800 Subject: [PATCH] 'commit' --- dsRag/ElasticSearch/T6_XiangLiangQuery.py | 14 +++++++++----- dsRag/Util/EsSearchUtil.py | 14 +++++++++++++- .../__pycache__/EsSearchUtil.cpython-310.pyc | Bin 5791 -> 6091 bytes 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/dsRag/ElasticSearch/T6_XiangLiangQuery.py b/dsRag/ElasticSearch/T6_XiangLiangQuery.py index 91a9b3d0..215ab7ad 100644 --- a/dsRag/ElasticSearch/T6_XiangLiangQuery.py +++ b/dsRag/ElasticSearch/T6_XiangLiangQuery.py @@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', message='Unverified HTTPS request is being mad if __name__ == "__main__": # 测试查询 - query = "小学数学中有哪些模型" + # query = "小学数学中有哪些模型" + query = "文言虚词" query_tags = ["MATH_1"] # 默认搜索标签,可修改 print(f"\n=== 开始执行查询 ===") print(f"原始查询文本: {query}") @@ -89,11 +90,14 @@ if __name__ == "__main__": # 打印详细结果 print("\n=== 最终搜索结果 ===") - print(f" 向量搜索结果: {len(vector_results['hits']['hits'])}条") + + vector_int = 0 for i, hit in enumerate(vector_results['hits']['hits'], 1): - print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") - print(f" 内容: {hit['_source']['user_input']}") - # print(f" 详细: {hit['_source']['tags']['full_content']}") + if hit['_score'] > 0.4: # 阀值0.4 + print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") + print(f" 内容: {hit['_source']['user_input']}") + vector_int = vector_int + 1 + print(f" 向量搜索结果: {vector_int}条") print("\n文本精确搜索结果:") for i, hit in enumerate(text_results['hits']['hits']): diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py index 09d1c569..36dea7c1 100644 --- a/dsRag/Util/EsSearchUtil.py +++ b/dsRag/Util/EsSearchUtil.py @@ -172,7 +172,19 @@ class EsSearchUtil: "size": 3 } ) - logger.info(f"5. 向量搜索结果数量: {len(vector_results['hits']['hits'])}") + # 处理一下,判断是否到达阀值 + filtered_vector_hits = [] + vector_int = 0 + for hit in vector_results['hits']['hits']: + if hit['_score'] > 0.4: # 阀值0.4 + logger.info(f" {vector_int + 1}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") + logger.info(f" 内容: {hit['_source']['user_input']}") + filtered_vector_hits.append(hit) + vector_int += 1 + + # 更新vector_results只包含通过过滤的文档 + vector_results['hits']['hits'] = filtered_vector_hits + logger.info(f"5. 向量搜索结果数量(过滤后): {vector_int}") # 文本精确搜索 logger.info("\n=== 文本精确搜索阶段 ===") diff --git a/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc b/dsRag/Util/__pycache__/EsSearchUtil.cpython-310.pyc index b8660cd06a341531f537f6061a2ff2d6bcf4a41c..7cdb86aa46b61156470b441e97c2372e0320a070 100644 GIT binary patch delta 809 zcmYjO%}*0i5P#EM+wHR57Fr7xmF2r2EeQ#jpvD-vcu~~E#3tG_eG3#w3)?LR@-}FS z;K?+3(WnG%qv2>UUPOr|9`)>@y?C+J_)qBU0*#Zr$;@wl^Z8!JzKyCmpHC(z)gP%j zz4xv9-8UFO*Tyd6%bPAjOkzS2EP)C0uCh=emRN$-D3{bvYqP}i&~{5=ZrTwc_M0Mi z#w;)M(g^w|7DQu*xymB#WD@nalPy9|V(e7JwoV_iy0*k^cLjhrObyX)%tSkkh|JlOMs)Aam7VlR`L33kiAX32iX66ZZ8P#H@rbbt=h9xnO2*JP(@ z)O0iXXoVsi)uBUlc%A&eToLFQ2WzsYNDrdBX$1udajo%Wvr&0(~xxZik zuv34y(RjQ&sjUj}v6+I3K}7*zqbPxF`JdAC{Qb#{d8T delta 510 zcmX@DKVO$GpO=@50SH9yBxbx(*vNO4SCtpYESj$|)oW-<& zc_BkBOV!KCTzvbSYM2(Vq==WW)v%>VNHTzMGvjQAxl9Wg85zKGl5mz3n8lLCUc#8d z1yswEBAp_WB0IT-U$LGYtX2+a4m+kfDe@@_Eer@7T!40@D1yb{szGAn47D8T47Hpk z9L)^1Ts0gn46!1$+%?=OvXTrnTxE>4b?d+xwqQnDczgqUXCO zKbtq_+0+d{5i5mTEE$<4#glu5(wT}JCw~-*p+0Ex*7!Xj+vK)j&IpM*2inTu0PZZRjOq}*alEGS6LONkOHO3g`4EKZG2 z&d