main
HuangHai 4 weeks ago
parent 91ca6e382b
commit 3b0354059f

@ -0,0 +1,76 @@
from elasticsearch import Elasticsearch
import jieba
import numpy as np
class EsSearchUtil:
def __init__(self, es_config):
"""
初始化Elasticsearch搜索工具
:param es_config: Elasticsearch配置字典包含hosts, username, password, index_name等
"""
self.es_config = es_config
self.es_conn = Elasticsearch(
hosts=es_config['hosts'],
basic_auth=(es_config['username'], es_config['password']),
verify_certs=False
)
def text_to_embedding(self, text):
words = jieba.lcut(text)
vector = np.random.rand(200).tolist()
return vector
def vector_search(self, query, size=10):
query_embedding = self.text_to_embedding(query)
script_query = {
"script_score": {
"query": {"match_all": {}},
"script": {
"source": "double score = cosineSimilarity(params.query_vector, 'embedding'); return score >= 0 ? score : 0",
"params": {"query_vector": query_embedding}
}
}
}
return self.es_conn.search(
index=self.es_config['index_name'],
query=script_query,
size=size
)
def text_search(self, query, size=10):
return self.es_conn.search(
index=self.es_config['index_name'],
query={"match": {"user_input": query}},
size=size
)
def hybrid_search(self, query, size=10):
"""
执行混合搜索向量搜索+文本搜索
:param query: 搜索查询文本
:param size: 返回结果数量
:return: 包含两种搜索结果的字典
"""
vector_results = self.vector_search(query, size)
text_results = self.text_search(query, size)
return {
'vector_results': vector_results,
'text_results': text_results
}
def search(self, query, search_type='hybrid', size=10):
"""
统一搜索接口
:param query: 搜索查询文本
:param search_type: 搜索类型'vector', 'text' 'hybrid'
:param size: 返回结果数量
:return: 搜索结果
"""
if search_type == 'vector':
return self.vector_search(query, size)
elif search_type == 'text':
return self.text_search(query, size)
else:
return self.hybrid_search(query, size)
Loading…
Cancel
Save