'commit'

4 months ago · d6434aa5be
parent 67c2ae73b8
commit d6434aa5be
5 changed files with 32 additions and 18 deletions
--- a/AI/WxMini/Milvus/Config/MulvusConfig.py
+++ b/AI/WxMini/Milvus/Config/MulvusConfig.py
@ -1,12 +1,23 @@
+# Milvus 服务器的主机地址
 MS_HOST = "10.10.14.101"
+
+# Milvus 服务器的端口号
 MS_PORT = "19530"
+
+# Milvus 集合的名称
 MS_COLLECTION_NAME = "ds_collection"
+
+# Milvus 连接池的最大连接数
 MS_MAX_CONNECTIONS = 5
-# "dimension": 128
-# "index_file_size": 1024
-# "metric_type": "L2"
-# "nlist": 1024
-# "nprobe": 16
-# "topk": 10
-# "batch_size": 100
-# "index_type": "IVF_FLAT"
+
+# 腾讯 AI Lab 中文词向量模型的路径
+MS_MODEL_PATH = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt"
+
+# 加载词向量模型时限制的词汇数量
+MS_MODEL_LIMIT = 10000
+
+# 词向量的维度（腾讯 AI Lab 中文词向量模型的维度为 200）
+MS_DIMENSION = 200
+
+# Milvus 搜索时的 nprobe 参数，用于控制搜索的精度和性能
+MS_NPROBE = 100
--- a/AI/WxMini/Milvus/Config/pycache/MulvusConfig.cpython-310.pyc
+++ b/AI/WxMini/Milvus/Config/pycache/MulvusConfig.cpython-310.pyc
--- a/AI/WxMini/Milvus/T1_create_collection.py
+++ b/AI/WxMini/Milvus/T1_create_collection.py
@ -24,7 +24,7 @@ if utility.has_collection(collection_name):
 fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),  # 主键字段，自动生成 ID
    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500),  # 存储对话文本
-    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=200)  # 向量字段，维度为 200
+    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION)  # 向量字段，维度为 200
 ]
 schema_description = "Simple demo collection"

--- a/AI/WxMini/Milvus/T3_insert_data.py
+++ b/AI/WxMini/Milvus/T3_insert_data.py
@ -5,10 +5,11 @@ from gensim.models import KeyedVectors
 import jieba

 # 加载预训练的 Word2Vec 模型
-model_path = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt"  # 替换为你的 Word2Vec 模型路径
-model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=10000)
+model_path = MS_MODEL_PATH  # 替换为你的 Word2Vec 模型路径
+model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
 print(f"模型加载成功，词向量维度: {model.vector_size}")

+
 # 将文本转换为嵌入向量
 def text_to_embedding(text):
    words = jieba.lcut(text)  # 使用 jieba 分词
@ -23,6 +24,7 @@ def text_to_embedding(text):
        print("未找到有效词，返回零向量")
        return [0.0] * model.vector_size

+
 # 1. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

@ -55,4 +57,4 @@ print("数据插入成功。")
 milvus_pool.release_connection(connection)

 # 7. 关闭连接池
-milvus_pool.close()
+milvus_pool.close()
--- a/AI/WxMini/Milvus/T5_search_near_word.py
+++ b/AI/WxMini/Milvus/T5_search_near_word.py
@ -1,4 +1,3 @@
-
 import time
 import jieba  # 导入 jieba 分词库
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
@ -7,10 +6,11 @@ from WxMini.Milvus.Config.MulvusConfig import *
 from gensim.models import KeyedVectors

 # 加载预训练的 Word2Vec 模型
-model_path = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt"  # 替换为你的 Word2Vec 模型路径
-model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=10000)
+model_path = MS_MODEL_PATH  # 替换为你的 Word2Vec 模型路径
+model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
 print(f"模型加载成功，词向量维度: {model.vector_size}")

+
 # 将文本转换为嵌入向量
 def text_to_embedding(text):
    words = jieba.lcut(text)  # 使用 jieba 分词
@ -25,6 +25,7 @@ def text_to_embedding(text):
        print("未找到有效词，返回零向量")
        return [0.0] * model.vector_size

+
 # 1. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

@ -46,8 +47,8 @@ current_embedding = text_to_embedding(input_text)

 # 7. 查询与当前对话最相关的历史对话
 search_params = {
-    "metric_type": "L2",       # 使用 L2 距离度量方式
-    "params": {"nprobe": 100}  # 设置 IVF_FLAT 的 nprobe 参数
+    "metric_type": "L2",  # 使用 L2 距离度量方式
+    "params": {"nprobe": MS_NPROBE}  # 设置 IVF_FLAT 的 nprobe 参数
 }
 start_time = time.time()
 results = collection_manager.search(current_embedding, search_params, limit=2)  # 返回 2 条结果
@ -73,4 +74,4 @@ print(f"查询耗时: {end_time - start_time:.4f} 秒")
 milvus_pool.release_connection(connection)

 # 11. 关闭连接池
-milvus_pool.close()
+milvus_pool.close()