From ff4b4ff61b1e1a4ef8a0eb21466871a6b9d6b64f Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 24 Mar 2025 13:32:29 +0800 Subject: [PATCH] 'commit' --- AI/WxMini/Milvus/T5_search_near_data.py | 15 ++++++---- AI/WxMini/Milvus/X3_insert_data.py | 39 +++++++++++++++++++++---- 2 files changed, 44 insertions(+), 10 deletions(-) diff --git a/AI/WxMini/Milvus/T5_search_near_data.py b/AI/WxMini/Milvus/T5_search_near_data.py index ac32201c..f5edcd8b 100644 --- a/AI/WxMini/Milvus/T5_search_near_data.py +++ b/AI/WxMini/Milvus/T5_search_near_data.py @@ -1,4 +1,5 @@ import time +import jieba # 导入 jieba 分词库 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from WxMini.Milvus.Utils.MilvusConnectionPool import * from WxMini.Milvus.Config.MulvusConfig import * @@ -12,11 +13,14 @@ print(f"模型加载成功,词向量维度: {model.vector_size}") # 将文本转换为嵌入向量 def text_to_embedding(text): - # 直接使用全句进行向量计算 - if text in model: - embedding = model[text] - print(f"生成的全句向量: {embedding[:5]}...") # 打印前 5 维 - return embedding + words = jieba.lcut(text) # 使用 jieba 分词 + print(f"文本: {text}, 分词结果: {words}") + embeddings = [model[word] for word in words if word in model] # 获取有效词向量 + print(f"有效词向量数量: {len(embeddings)}") + if embeddings: + avg_embedding = sum(embeddings) / len(embeddings) # 计算平均向量 + print(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维 + return avg_embedding else: print("未找到有效词,返回零向量") return [0.0] * model.vector_size @@ -40,6 +44,7 @@ input_text = input("请输入一句话:") # 例如:“我今天心情不太 # 7. 将文本转换为嵌入向量 current_embedding = text_to_embedding(input_text) +print(f"当前文本的向量: {current_embedding[:5]}...") # 打印前 5 维 # 8. 查询与当前对话最相关的历史对话 search_params = { diff --git a/AI/WxMini/Milvus/X3_insert_data.py b/AI/WxMini/Milvus/X3_insert_data.py index d6665500..9b6c499c 100644 --- a/AI/WxMini/Milvus/X3_insert_data.py +++ b/AI/WxMini/Milvus/X3_insert_data.py @@ -1,9 +1,18 @@ from WxMini.Milvus.Config.MulvusConfig import * from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from WxMini.Milvus.Utils.MilvusConnectionPool import * +from WxMini.TtsConfig import * from gensim.models import KeyedVectors + import jieba import time +from openai import OpenAI + +# 初始化 OpenAI 客户端 +client = OpenAI( + api_key=MODEL_API_KEY, + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", +) # 1. 加载预训练的 Word2Vec 模型 model_path = MS_MODEL_PATH @@ -36,14 +45,34 @@ collection_manager = MilvusCollectionManager(collection_name) # 5. 输入一个用户问题 user_input = input("请输入一句话:") # 例如:“我今天心情不太好” -model_response = "我没听懂,能再说一遍吗?" # 大模型的固定回复 + +# 6. 调用大模型生成回复 +try: + response = client.chat.completions.create( + model=MODEL_NAME, + messages=[ + {"role": "system", "content": "你是一个非常好的聊天伙伴,可以疏导用户,帮他解压,一句控制在20字以内。"}, + {"role": "user", "content": user_input} + ], + max_tokens=500 + ) + if response.choices and response.choices[0].message.content: + model_response = response.choices[0].message.content.strip() + else: + model_response = "大模型未返回有效结果" +except Exception as e: + model_response = f"调用大模型失败: {str(e)}" + +print(f"大模型回复: {model_response}") + +# 7. 获取当前时间和会话 ID timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) # 当前时间 session_id = "session_001" # 会话 ID(可以根据需要动态生成) -# 6. 将用户问题转换为嵌入向量 +# 8. 将用户问题转换为嵌入向量 user_embedding = text_to_embedding(user_input) -# 7. 插入数据,确保字段顺序与集合定义一致 +# 9. 插入数据,确保字段顺序与集合定义一致 entities = [ [session_id], # session_id [user_input], # user_input @@ -54,8 +83,8 @@ entities = [ collection_manager.insert_data(entities) print("数据插入成功。") -# 8. 释放连接 +# 10. 释放连接 milvus_pool.release_connection(connection) -# 9. 关闭连接池 +# 11. 关闭连接池 milvus_pool.close() \ No newline at end of file