main
HuangHai 4 weeks ago
parent c8cd71f7c8
commit dc312b0852

@ -1,6 +1,9 @@
from elasticsearch import Elasticsearch
import jieba
import numpy as np
from elasticsearch import Elasticsearch
from gensim.models import KeyedVectors
from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT
class EsSearchUtil:
def __init__(self, es_config):
@ -16,9 +19,18 @@ class EsSearchUtil:
)
def text_to_embedding(self, text):
# 当前实现为随机向量生成,后续可替换为实际模型
vector = np.random.rand(200).tolist()
return vector
# 加载预训练模型
model = KeyedVectors.load_word2vec_format(MS_MODEL_PATH, binary=False, limit=MS_MODEL_LIMIT)
# 对文本分词并计算平均向量
words = jieba.lcut(text)
vectors = [model[word] for word in words if word in model]
if not vectors:
return [0.0] * model.vector_size
# 计算平均向量
avg_vector = [sum(dim)/len(vectors) for dim in zip(*vectors)]
return avg_vector
def vector_search(self, query, size=10):
query_embedding = self.text_to_embedding(query)

Loading…
Cancel
Save