You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

76 lines
2.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from elasticsearch import Elasticsearch
import jieba
import numpy as np
class EsSearchUtil:
def __init__(self, es_config):
"""
初始化Elasticsearch搜索工具
:param es_config: Elasticsearch配置字典包含hosts, username, password, index_name等
"""
self.es_config = es_config
self.es_conn = Elasticsearch(
hosts=es_config['hosts'],
basic_auth=es_config['basic_auth'],
verify_certs=False
)
def text_to_embedding(self, text):
# 当前实现为随机向量生成,后续可替换为实际模型
vector = np.random.rand(200).tolist()
return vector
def vector_search(self, query, size=10):
query_embedding = self.text_to_embedding(query)
script_query = {
"script_score": {
"query": {"match_all": {}},
"script": {
"source": "double score = cosineSimilarity(params.query_vector, 'embedding'); return score >= 0 ? score : 0",
"params": {"query_vector": query_embedding}
}
}
}
return self.es_conn.search(
index=self.es_config['index_name'],
query=script_query,
size=size
)
def text_search(self, query, size=10):
return self.es_conn.search(
index=self.es_config['index_name'],
query={"match": {"user_input": query}},
size=size
)
def hybrid_search(self, query, size=10):
"""
执行混合搜索(向量搜索+文本搜索)
:param query: 搜索查询文本
:param size: 返回结果数量
:return: 包含两种搜索结果的字典
"""
vector_results = self.vector_search(query, size)
text_results = self.text_search(query, size)
return {
'vector_results': vector_results,
'text_results': text_results
}
def search(self, query, search_type='hybrid', size=10):
"""
统一搜索接口
:param query: 搜索查询文本
:param search_type: 搜索类型('vector', 'text''hybrid'
:param size: 返回结果数量
:return: 搜索结果
"""
if search_type == 'vector':
return self.vector_search(query, size)
elif search_type == 'text':
return self.text_search(query, size)
else:
return self.hybrid_search(query, size)