'commit'

4 months ago · 17066c1b27
parent 92d50e11df
commit 17066c1b27
4 changed files with 24 additions and 23 deletions
--- a/AI/WxMini/Milvus/T2_create_index.py
+++ b/AI/WxMini/Milvus/T2_create_index.py
@ -1,4 +1,3 @@
-# create_index.py
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
 from WxMini.Milvus.Utils.MilvusConnectionPool import *
 from WxMini.Milvus.Config.MulvusConfig import *
--- a/AI/WxMini/Milvus/T3_insert_data.py
+++ b/AI/WxMini/Milvus/T3_insert_data.py
@ -4,13 +4,13 @@ from WxMini.Milvus.Utils.MilvusConnectionPool import *
 from gensim.models import KeyedVectors
 import jieba

-# 加载预训练的 Word2Vec 模型
+# 1. 加载预训练的 Word2Vec 模型
 model_path = MS_MODEL_PATH
 model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
 print(f"模型加载成功，词向量维度: {model.vector_size}")


-# 将文本转换为嵌入向量
+# 功能：将文本转换为嵌入向量
 def text_to_embedding(text):
    words = jieba.lcut(text)  # 使用 jieba 分词
    print(f"文本: {text}, 分词结果: {words}")
@ -25,17 +25,17 @@ def text_to_embedding(text):
        return [0.0] * model.vector_size


-# 1. 使用连接池管理 Milvus 连接
+# 2. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

-# 2. 从连接池中获取一个连接
+# 3. 从连接池中获取一个连接
 connection = milvus_pool.get_connection()

-# 3. 初始化集合管理器
+# 4. 初始化集合管理器
 collection_name = MS_COLLECTION_NAME
 collection_manager = MilvusCollectionManager(collection_name)

-# 4. 插入数据
+# 5. 插入数据
 texts = [
    "我今天心情不太好，因为工作压力很大。",  # 第一个对话文本
    "我最近在学习 Python，感觉很有趣。",  # 第二个对话文本
@ -44,16 +44,16 @@ texts = [
 ]
 embeddings = [text_to_embedding(text) for text in texts]  # 使用文本模型生成向量

-# 打印生成的向量值
+# 6. 打印生成的向量值
 for text, embedding in zip(texts, embeddings):
    print(f"文本: {text}, 向量: {embedding[:5]}...")  # 打印前 5 维

-# 5. 插入数据，确保字段顺序与集合定义一致
+# 7. 插入数据，确保字段顺序与集合定义一致
 entities = [texts, embeddings]  # 第一个列表是 text 字段，第二个列表是 embedding 字段
 collection_manager.insert_data(entities)

-# 6. 释放连接
+# 8. 释放连接
 milvus_pool.release_connection(connection)

-# 7. 关闭连接池
+# 9. 关闭连接池
 milvus_pool.close()
--- a/AI/WxMini/Milvus/T5_search_near_data.py
+++ b/AI/WxMini/Milvus/T5_search_near_data.py
@ -5,7 +5,7 @@ from WxMini.Milvus.Utils.MilvusConnectionPool import *
 from WxMini.Milvus.Config.MulvusConfig import *
 from gensim.models import KeyedVectors

-# 加载预训练的 Word2Vec 模型
+# 1. 加载预训练的 Word2Vec 模型
 model_path = MS_MODEL_PATH  # 替换为你的 Word2Vec 模型路径
 model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
 print(f"模型加载成功，词向量维度: {model.vector_size}")
@ -26,26 +26,26 @@ def text_to_embedding(text):
        return [0.0] * model.vector_size


-# 1. 使用连接池管理 Milvus 连接
+# 2. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

-# 2. 从连接池中获取一个连接
+# 3. 从连接池中获取一个连接
 connection = milvus_pool.get_connection()

-# 3. 初始化集合管理器
+# 4. 初始化集合管理器
 collection_name = MS_COLLECTION_NAME
 collection_manager = MilvusCollectionManager(collection_name)

-# 4. 加载集合到内存
+# 5. 加载集合到内存
 collection_manager.load_collection()

-# 5. 输入一句话
+# 6. 输入一句话
 input_text = input("请输入一句话：")  # 例如：“我今天心情不太好”

-# 6. 将文本转换为嵌入向量
+# 7. 将文本转换为嵌入向量
 current_embedding = text_to_embedding(input_text)

-# 7. 查询与当前对话最相关的历史对话
+# 8. 查询与当前对话最相关的历史对话
 search_params = {
    "metric_type": "L2",  # 使用 L2 距离度量方式
    "params": {"nprobe": MS_NPROBE}  # 设置 IVF_FLAT 的 nprobe 参数
@ -54,7 +54,7 @@ start_time = time.time()
 results = collection_manager.search(current_embedding, search_params, limit=2)  # 返回 2 条结果
 end_time = time.time()

-# 8. 输出查询结果
+# 9. 输出查询结果
 print("最相关的历史对话:")
 if results:
    for hits in results:
@ -67,11 +67,11 @@ if results:
 else:
    print("未找到相关历史对话，请检查查询参数或数据。")

-# 9. 输出查询耗时
+# 10. 输出查询耗时
 print(f"查询耗时: {end_time - start_time:.4f} 秒")

-# 10. 释放连接
+# 11. 释放连接
 milvus_pool.release_connection(connection)

-# 11. 关闭连接池
+# 12. 关闭连接池
 milvus_pool.close()
--- a/AI/WxMini/安装.txt
+++ b/AI/WxMini/安装.txt
@ -1,5 +1,7 @@
 # 腾讯 AI Lab 的中文词向量
+https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.tar.gz

+# 显示已安装包的版本号
 pip show gensim
 pip show jieba
 pip show pymilvus