from elasticsearch import Elasticsearch import jieba import numpy as np class EsSearchUtil: def __init__(self, es_config): """ 初始化Elasticsearch搜索工具 :param es_config: Elasticsearch配置字典,包含hosts, username, password, index_name等 """ self.es_config = es_config self.es_conn = Elasticsearch( hosts=es_config['hosts'], basic_auth=(es_config['username'], es_config['password']), verify_certs=False ) def text_to_embedding(self, text): words = jieba.lcut(text) vector = np.random.rand(200).tolist() return vector def vector_search(self, query, size=10): query_embedding = self.text_to_embedding(query) script_query = { "script_score": { "query": {"match_all": {}}, "script": { "source": "double score = cosineSimilarity(params.query_vector, 'embedding'); return score >= 0 ? score : 0", "params": {"query_vector": query_embedding} } } } return self.es_conn.search( index=self.es_config['index_name'], query=script_query, size=size ) def text_search(self, query, size=10): return self.es_conn.search( index=self.es_config['index_name'], query={"match": {"user_input": query}}, size=size ) def hybrid_search(self, query, size=10): """ 执行混合搜索(向量搜索+文本搜索) :param query: 搜索查询文本 :param size: 返回结果数量 :return: 包含两种搜索结果的字典 """ vector_results = self.vector_search(query, size) text_results = self.text_search(query, size) return { 'vector_results': vector_results, 'text_results': text_results } def search(self, query, search_type='hybrid', size=10): """ 统一搜索接口 :param query: 搜索查询文本 :param search_type: 搜索类型('vector', 'text' 或 'hybrid') :param size: 返回结果数量 :return: 搜索结果 """ if search_type == 'vector': return self.vector_search(query, size) elif search_type == 'text': return self.text_search(query, size) else: return self.hybrid_search(query, size)