from elasticsearch import Elasticsearch from Util.EmbeddingUtil import text_to_embedding import random import Config.Config as config # 初始化ES连接 es = Elasticsearch( hosts=config.ES_CONFIG['hosts'], basic_auth=config.ES_CONFIG['basic_auth'], verify_certs=config.ES_CONFIG['verify_certs'] ) def vector_search(text): """向量相似度搜索""" vector = text_to_embedding(text) script_query = { "script_score": { "query": {"match_all": {}}, "script": { "source": "cosineSimilarity(params.query_vector, 'vector') + 1.0", "params": {"query_vector": vector} } } } response = es.search( index='knowledge_base', body={ "size": 5, "query": script_query, "_source": ["text"] } ) return [hit['_source']['text'] for hit in response['hits']['hits']] def text_search(text): """文本精确搜索""" response = es.search( index='raw_texts', body={ "query": { "match_phrase": { "text": text } } } ) return [hit['_source']['text'] for hit in response['hits']['hits']] def test_queries(file_path): """从文本文件中随机选取5个句子进行测试""" with open(file_path, 'r', encoding='utf-8') as f: sentences = [line.strip() for line in f if line.strip()] test_samples = random.sample(sentences, min(5, len(sentences))) for sample in test_samples: print(f"测试句子: {sample}") print("向量搜索结果:") for result in vector_search(sample): print(f"- {result}") print("\n文本精确搜索结果:") for result in text_search(sample): print(f"- {result}") print("="*50) if __name__ == "__main__": test_queries("../Txt/人口变化趋势对云南教育的影响.txt")