You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

from Milvus.Config.MulvusConfig import *
from Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
from Milvus.Utils.MilvusConnectionPool import *
from gensim.models import KeyedVectors
import jieba
import time
from openai import OpenAI
# 初始化 OpenAI 客户端
client = OpenAI(
api_key=MODEL_API_KEY,
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
# 1. 加载预训练的 Word2Vec 模型
model_path = MS_MODEL_PATH
model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
print(f"模型加载成功,词向量维度: {model.vector_size}")
# 功能:将文本转换为嵌入向量
def text_to_embedding(text):
words = jieba.lcut(text) # 使用 jieba 分词
print(f"文本: {text}, 分词结果: {words}")
embeddings = [model[word] for word in words if word in model]
print(f"有效词向量数量: {len(embeddings)}")
if embeddings:
avg_embedding = sum(embeddings) / len(embeddings)
print(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维
return avg_embedding
else:
print("未找到有效词,返回零向量")
return [0.0] * model.vector_size
# 2. 使用连接池管理 Milvus 连接
milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
# 3. 从连接池中获取一个连接
connection = milvus_pool.get_connection()
# 4. 初始化集合管理器
collection_name = MS_COLLECTION_NAME
collection_manager = MilvusCollectionManager(collection_name)
# 5. 输入一个用户问题
user_input = "我给你起的名字叫小云,记住你的名字,你是我的私人助理。"
# 6. 调用大模型生成回复
try:
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": "你是一个非常好的聊天伙伴,可以疏导用户,帮他解压,一句控制在20字以内。"},
{"role": "user", "content": user_input}
],
max_tokens=500
)
if response.choices and response.choices[0].message.content:
model_response = response.choices[0].message.content.strip()
else:
model_response = "大模型未返回有效结果"
except Exception as e:
model_response = f"调用大模型失败: {str(e)}"
print(f"大模型回复: {model_response}")
# 7. 获取当前时间和会话 ID
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 当前时间
person_id = "F9D1C319-215D-3B4E-FAA0-0E024AC12D3C" # 会话 ID可以根据需要动态生成
# 8. 将用户问题转换为嵌入向量
user_embedding = text_to_embedding(user_input)
# 9. 插入数据,确保字段顺序与集合定义一致
entities = [
[person_id], # person_id
[user_input], # user_input
[model_response], # model_response
[timestamp], # timestamp
[user_embedding] # embedding
]
collection_manager.insert_data(entities)
print("数据插入成功。")
# 10. 释放连接
milvus_pool.release_connection(connection)
# 11. 关闭连接池
milvus_pool.close()