main
HuangHai 3 weeks ago
parent 398ceb887b
commit 43b3d4f602

@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', message='Unverified HTTPS request is being mad
if __name__ == "__main__": if __name__ == "__main__":
# 测试查询 # 测试查询
query = "小学数学中有哪些模型" # query = "小学数学中有哪些模型"
query = "文言虚词"
query_tags = ["MATH_1"] # 默认搜索标签,可修改 query_tags = ["MATH_1"] # 默认搜索标签,可修改
print(f"\n=== 开始执行查询 ===") print(f"\n=== 开始执行查询 ===")
print(f"原始查询文本: {query}") print(f"原始查询文本: {query}")
@ -89,11 +90,14 @@ if __name__ == "__main__":
# 打印详细结果 # 打印详细结果
print("\n=== 最终搜索结果 ===") print("\n=== 最终搜索结果 ===")
print(f" 向量搜索结果: {len(vector_results['hits']['hits'])}")
vector_int = 0
for i, hit in enumerate(vector_results['hits']['hits'], 1): for i, hit in enumerate(vector_results['hits']['hits'], 1):
print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}") if hit['_score'] > 0.4: # 阀值0.4
print(f" 内容: {hit['_source']['user_input']}") print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}")
# print(f" 详细: {hit['_source']['tags']['full_content']}") print(f" 内容: {hit['_source']['user_input']}")
vector_int = vector_int + 1
print(f" 向量搜索结果: {vector_int}")
print("\n文本精确搜索结果:") print("\n文本精确搜索结果:")
for i, hit in enumerate(text_results['hits']['hits']): for i, hit in enumerate(text_results['hits']['hits']):

@ -172,7 +172,19 @@ class EsSearchUtil:
"size": 3 "size": 3
} }
) )
logger.info(f"5. 向量搜索结果数量: {len(vector_results['hits']['hits'])}") # 处理一下,判断是否到达阀值
filtered_vector_hits = []
vector_int = 0
for hit in vector_results['hits']['hits']:
if hit['_score'] > 0.4: # 阀值0.4
logger.info(f" {vector_int + 1}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}")
logger.info(f" 内容: {hit['_source']['user_input']}")
filtered_vector_hits.append(hit)
vector_int += 1
# 更新vector_results只包含通过过滤的文档
vector_results['hits']['hits'] = filtered_vector_hits
logger.info(f"5. 向量搜索结果数量(过滤后): {vector_int}")
# 文本精确搜索 # 文本精确搜索
logger.info("\n=== 文本精确搜索阶段 ===") logger.info("\n=== 文本精确搜索阶段 ===")

Loading…
Cancel
Save