diff --git a/dsRag/Util/EsSearchUtil.py b/dsRag/Util/EsSearchUtil.py new file mode 100644 index 00000000..5be19c8a --- /dev/null +++ b/dsRag/Util/EsSearchUtil.py @@ -0,0 +1,76 @@ +from elasticsearch import Elasticsearch +import jieba +import numpy as np + +class EsSearchUtil: + def __init__(self, es_config): + """ + 初始化Elasticsearch搜索工具 + :param es_config: Elasticsearch配置字典,包含hosts, username, password, index_name等 + """ + self.es_config = es_config + self.es_conn = Elasticsearch( + hosts=es_config['hosts'], + basic_auth=(es_config['username'], es_config['password']), + verify_certs=False + ) + + def text_to_embedding(self, text): + words = jieba.lcut(text) + vector = np.random.rand(200).tolist() + return vector + + def vector_search(self, query, size=10): + query_embedding = self.text_to_embedding(query) + script_query = { + "script_score": { + "query": {"match_all": {}}, + "script": { + "source": "double score = cosineSimilarity(params.query_vector, 'embedding'); return score >= 0 ? score : 0", + "params": {"query_vector": query_embedding} + } + } + } + + return self.es_conn.search( + index=self.es_config['index_name'], + query=script_query, + size=size + ) + + def text_search(self, query, size=10): + return self.es_conn.search( + index=self.es_config['index_name'], + query={"match": {"user_input": query}}, + size=size + ) + + def hybrid_search(self, query, size=10): + """ + 执行混合搜索(向量搜索+文本搜索) + :param query: 搜索查询文本 + :param size: 返回结果数量 + :return: 包含两种搜索结果的字典 + """ + vector_results = self.vector_search(query, size) + text_results = self.text_search(query, size) + + return { + 'vector_results': vector_results, + 'text_results': text_results + } + + def search(self, query, search_type='hybrid', size=10): + """ + 统一搜索接口 + :param query: 搜索查询文本 + :param search_type: 搜索类型('vector', 'text' 或 'hybrid') + :param size: 返回结果数量 + :return: 搜索结果 + """ + if search_type == 'vector': + return self.vector_search(query, size) + elif search_type == 'text': + return self.text_search(query, size) + else: + return self.hybrid_search(query, size) \ No newline at end of file