main
HuangHai 3 weeks ago
commit 2a274f8968

@ -16,7 +16,8 @@ warnings.filterwarnings('ignore', message='Unverified HTTPS request is being mad
if __name__ == "__main__":
# 测试查询
query = "小学数学中有哪些模型"
# query = "小学数学中有哪些模型"
query = "文言虚词"
query_tags = ["MATH_1"] # 默认搜索标签,可修改
print(f"\n=== 开始执行查询 ===")
print(f"原始查询文本: {query}")
@ -89,11 +90,14 @@ if __name__ == "__main__":
# 打印详细结果
print("\n=== 最终搜索结果 ===")
print(f" 向量搜索结果: {len(vector_results['hits']['hits'])}")
vector_int = 0
for i, hit in enumerate(vector_results['hits']['hits'], 1):
print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}")
print(f" 内容: {hit['_source']['user_input']}")
# print(f" 详细: {hit['_source']['tags']['full_content']}")
if hit['_score'] > 0.4: # 阀值0.4
print(f" {i}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}")
print(f" 内容: {hit['_source']['user_input']}")
vector_int = vector_int + 1
print(f" 向量搜索结果: {vector_int}")
print("\n文本精确搜索结果:")
for i, hit in enumerate(text_results['hits']['hits']):

@ -172,7 +172,19 @@ class EsSearchUtil:
"size": 3
}
)
logger.info(f"5. 向量搜索结果数量: {len(vector_results['hits']['hits'])}")
# 处理一下,判断是否到达阀值
filtered_vector_hits = []
vector_int = 0
for hit in vector_results['hits']['hits']:
if hit['_score'] > 0.4: # 阀值0.4
logger.info(f" {vector_int + 1}. 文档ID: {hit['_id']}, 相似度分数: {hit['_score']:.2f}")
logger.info(f" 内容: {hit['_source']['user_input']}")
filtered_vector_hits.append(hit)
vector_int += 1
# 更新vector_results只包含通过过滤的文档
vector_results['hits']['hits'] = filtered_vector_hits
logger.info(f"5. 向量搜索结果数量(过滤后): {vector_int}")
# 文本精确搜索
logger.info("\n=== 文本精确搜索阶段 ===")

Loading…
Cancel
Save