|
|
|
@ -1,6 +1,9 @@
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
import jieba
|
|
|
|
|
import numpy as np
|
|
|
|
|
from elasticsearch import Elasticsearch
|
|
|
|
|
from gensim.models import KeyedVectors
|
|
|
|
|
|
|
|
|
|
from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class EsSearchUtil:
|
|
|
|
|
def __init__(self, es_config):
|
|
|
|
@ -16,9 +19,18 @@ class EsSearchUtil:
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def text_to_embedding(self, text):
|
|
|
|
|
# 当前实现为随机向量生成,后续可替换为实际模型
|
|
|
|
|
vector = np.random.rand(200).tolist()
|
|
|
|
|
return vector
|
|
|
|
|
# 加载预训练模型
|
|
|
|
|
model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT)
|
|
|
|
|
# 对文本分词并计算平均向量
|
|
|
|
|
words = jieba.lcut(text)
|
|
|
|
|
vectors = [model[word] for word in words if word in model]
|
|
|
|
|
|
|
|
|
|
if not vectors:
|
|
|
|
|
return [0.0] * model.vector_size
|
|
|
|
|
|
|
|
|
|
# 计算平均向量
|
|
|
|
|
avg_vector = [sum(dim)/len(vectors) for dim in zip(*vectors)]
|
|
|
|
|
return avg_vector
|
|
|
|
|
|
|
|
|
|
def vector_search(self, query, size=10):
|
|
|
|
|
query_embedding = self.text_to_embedding(query)
|
|
|
|
|