diff --git a/AI/WxMini/Milvus/Config/MulvusConfig.py b/AI/WxMini/Milvus/Config/MulvusConfig.py index ef42f924..d5443b7d 100644 --- a/AI/WxMini/Milvus/Config/MulvusConfig.py +++ b/AI/WxMini/Milvus/Config/MulvusConfig.py @@ -1,12 +1,23 @@ +# Milvus 服务器的主机地址 MS_HOST = "10.10.14.101" + +# Milvus 服务器的端口号 MS_PORT = "19530" + +# Milvus 集合的名称 MS_COLLECTION_NAME = "ds_collection" + +# Milvus 连接池的最大连接数 MS_MAX_CONNECTIONS = 5 -# "dimension": 128 -# "index_file_size": 1024 -# "metric_type": "L2" -# "nlist": 1024 -# "nprobe": 16 -# "topk": 10 -# "batch_size": 100 -# "index_type": "IVF_FLAT" + +# 腾讯 AI Lab 中文词向量模型的路径 +MS_MODEL_PATH = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt" + +# 加载词向量模型时限制的词汇数量 +MS_MODEL_LIMIT = 10000 + +# 词向量的维度(腾讯 AI Lab 中文词向量模型的维度为 200) +MS_DIMENSION = 200 + +# Milvus 搜索时的 nprobe 参数,用于控制搜索的精度和性能 +MS_NPROBE = 100 \ No newline at end of file diff --git a/AI/WxMini/Milvus/Config/__pycache__/MulvusConfig.cpython-310.pyc b/AI/WxMini/Milvus/Config/__pycache__/MulvusConfig.cpython-310.pyc index 65b8dae5..40071efd 100644 Binary files a/AI/WxMini/Milvus/Config/__pycache__/MulvusConfig.cpython-310.pyc and b/AI/WxMini/Milvus/Config/__pycache__/MulvusConfig.cpython-310.pyc differ diff --git a/AI/WxMini/Milvus/T1_create_collection.py b/AI/WxMini/Milvus/T1_create_collection.py index 2921fa96..1231995e 100644 --- a/AI/WxMini/Milvus/T1_create_collection.py +++ b/AI/WxMini/Milvus/T1_create_collection.py @@ -24,7 +24,7 @@ if utility.has_collection(collection_name): fields = [ FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), # 主键字段,自动生成 ID FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500), # 存储对话文本 - FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=200) # 向量字段,维度为 200 + FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION) # 向量字段,维度为 200 ] schema_description = "Simple demo collection" diff --git a/AI/WxMini/Milvus/T3_insert_data.py b/AI/WxMini/Milvus/T3_insert_data.py index 7a5f7a77..fc431dcf 100644 --- a/AI/WxMini/Milvus/T3_insert_data.py +++ b/AI/WxMini/Milvus/T3_insert_data.py @@ -5,10 +5,11 @@ from gensim.models import KeyedVectors import jieba # 加载预训练的 Word2Vec 模型 -model_path = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt" # 替换为你的 Word2Vec 模型路径 -model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=10000) +model_path = MS_MODEL_PATH # 替换为你的 Word2Vec 模型路径 +model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT) print(f"模型加载成功,词向量维度: {model.vector_size}") + # 将文本转换为嵌入向量 def text_to_embedding(text): words = jieba.lcut(text) # 使用 jieba 分词 @@ -23,6 +24,7 @@ def text_to_embedding(text): print("未找到有效词,返回零向量") return [0.0] * model.vector_size + # 1. 使用连接池管理 Milvus 连接 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) @@ -55,4 +57,4 @@ print("数据插入成功。") milvus_pool.release_connection(connection) # 7. 关闭连接池 -milvus_pool.close() \ No newline at end of file +milvus_pool.close() diff --git a/AI/WxMini/Milvus/T5_search_near_word.py b/AI/WxMini/Milvus/T5_search_near_word.py index d25057ae..64c82593 100644 --- a/AI/WxMini/Milvus/T5_search_near_word.py +++ b/AI/WxMini/Milvus/T5_search_near_word.py @@ -1,4 +1,3 @@ - import time import jieba # 导入 jieba 分词库 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager @@ -7,10 +6,11 @@ from WxMini.Milvus.Config.MulvusConfig import * from gensim.models import KeyedVectors # 加载预训练的 Word2Vec 模型 -model_path = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt" # 替换为你的 Word2Vec 模型路径 -model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=10000) +model_path = MS_MODEL_PATH # 替换为你的 Word2Vec 模型路径 +model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT) print(f"模型加载成功,词向量维度: {model.vector_size}") + # 将文本转换为嵌入向量 def text_to_embedding(text): words = jieba.lcut(text) # 使用 jieba 分词 @@ -25,6 +25,7 @@ def text_to_embedding(text): print("未找到有效词,返回零向量") return [0.0] * model.vector_size + # 1. 使用连接池管理 Milvus 连接 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) @@ -46,8 +47,8 @@ current_embedding = text_to_embedding(input_text) # 7. 查询与当前对话最相关的历史对话 search_params = { - "metric_type": "L2", # 使用 L2 距离度量方式 - "params": {"nprobe": 100} # 设置 IVF_FLAT 的 nprobe 参数 + "metric_type": "L2", # 使用 L2 距离度量方式 + "params": {"nprobe": MS_NPROBE} # 设置 IVF_FLAT 的 nprobe 参数 } start_time = time.time() results = collection_manager.search(current_embedding, search_params, limit=2) # 返回 2 条结果 @@ -73,4 +74,4 @@ print(f"查询耗时: {end_time - start_time:.4f} 秒") milvus_pool.release_connection(connection) # 11. 关闭连接池 -milvus_pool.close() \ No newline at end of file +milvus_pool.close()