'commit'

4 weeks ago · 8decf74959
parent 7ff869e4a5
commit 8decf74959
6 changed files with 0 additions and 249 deletions
--- a/dsRag/Milvus/Test/T1_create_collection.py
+++ b/dsRag/Milvus/Test/T1_create_collection.py
@ -1,40 +0,0 @@
-from pymilvus import FieldSchema, DataType, utility
-
-from WxMini.Milvus.Config.MulvusConfig import *
-from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
-from WxMini.Milvus.Utils.MilvusConnectionPool import *
-
-# 1. 使用连接池管理 Milvus 连接
-milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
-
-# 2. 从连接池中获取一个连接
-connection = milvus_pool.get_connection()
-
-# 3. 初始化集合管理器
-collection_name = MS_COLLECTION_NAME
-collection_manager = MilvusCollectionManager(collection_name)
-
-# 4. 判断集合是否存在，存在则删除
-if utility.has_collection(collection_name):
-    print(f"集合 '{collection_name}' 已存在，正在删除...")
-    utility.drop_collection(collection_name)
-    print(f"集合 '{collection_name}' 已删除。")
-
-# 5. 定义集合的字段和模式
-fields = [
-    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),  # 主键字段，自动生成 ID
-    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500),  # 存储对话文本
-    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION)  # 向量字段，维度为 200
-]
-schema_description = "Simple demo collection"
-
-# 6. 创建集合
-print(f"正在创建集合 '{collection_name}'...")
-collection_manager.create_collection(fields, schema_description)
-print(f"集合 '{collection_name}' 创建成功。")
-
-# 7. 释放连接
-milvus_pool.release_connection(connection)
-
-# 8. 关闭连接池
-milvus_pool.close()
--- a/dsRag/Milvus/Test/T2_create_index.py
+++ b/dsRag/Milvus/Test/T2_create_index.py
@ -1,27 +0,0 @@
-from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
-from WxMini.Milvus.Utils.MilvusConnectionPool import *
-from WxMini.Milvus.Config.MulvusConfig import *
-
-# 1. 使用连接池管理 Milvus 连接
-milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
-
-# 2. 从连接池中获取一个连接
-connection = milvus_pool.get_connection()
-
-# 3. 初始化集合管理器
-collection_name = MS_COLLECTION_NAME
-collection_manager = MilvusCollectionManager(collection_name)
-
-# 4. 创建索引
-index_params = {
-    "index_type": "IVF_FLAT",  # 使用 IVF_FLAT 索引类型
-    "metric_type": "L2",       # 使用 L2 距离度量方式
-    "params": {"nlist": 128}   # 设置 IVF_FLAT 的 nlist 参数
-}
-collection_manager.create_index("embedding", index_params)
-
-# 5. 释放连接
-milvus_pool.release_connection(connection)
-
-# 6. 关闭连接池
-milvus_pool.close()
--- a/dsRag/Milvus/Test/T3_insert_data.py
+++ b/dsRag/Milvus/Test/T3_insert_data.py
@ -1,59 +0,0 @@
-from WxMini.Milvus.Config.MulvusConfig import *
-from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
-from WxMini.Milvus.Utils.MilvusConnectionPool import *
-from gensim.models import KeyedVectors
-import jieba
-
-# 1. 加载预训练的 Word2Vec 模型
-model_path = MS_MODEL_PATH
-model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
-print(f"模型加载成功，词向量维度: {model.vector_size}")
-
-
-# 功能：将文本转换为嵌入向量
-def text_to_embedding(text):
-    words = jieba.lcut(text)  # 使用 jieba 分词
-    print(f"文本: {text}, 分词结果: {words}")
-    embeddings = [model[word] for word in words if word in model]
-    print(f"有效词向量数量: {len(embeddings)}")
-    if embeddings:
-        avg_embedding = sum(embeddings) / len(embeddings)
-        print(f"生成的平均向量: {avg_embedding[:5]}...")  # 打印前 5 维
-        return avg_embedding
-    else:
-        print("未找到有效词，返回零向量")
-        return [0.0] * model.vector_size
-
-
-# 2. 使用连接池管理 Milvus 连接
-milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
-
-# 3. 从连接池中获取一个连接
-connection = milvus_pool.get_connection()
-
-# 4. 初始化集合管理器
-collection_name = MS_COLLECTION_NAME
-collection_manager = MilvusCollectionManager(collection_name)
-
-# 5. 插入数据
-texts = [
-    "我今天心情不太好，因为工作压力很大。",  # 第一个对话文本
-    "我最近在学习 Python，感觉很有趣。",  # 第二个对话文本
-    "我打算周末去爬山，放松一下。",  # 第三个对话文本
-    "吉林省广告产业园是东师理想的办公地点。"  # 第四个对话文本
-]
-embeddings = [text_to_embedding(text) for text in texts]  # 使用文本模型生成向量
-
-# 6. 打印生成的向量值
-for text, embedding in zip(texts, embeddings):
-    print(f"文本: {text}, 向量: {embedding[:5]}...")  # 打印前 5 维
-
-# 7. 插入数据，确保字段顺序与集合定义一致
-entities = [texts, embeddings]  # 第一个列表是 text 字段，第二个列表是 embedding 字段
-collection_manager.insert_data(entities)
-
-# 8. 释放连接
-milvus_pool.release_connection(connection)
-
-# 9. 关闭连接池
-milvus_pool.close()
--- a/dsRag/Milvus/Test/T4_select_all_data.py
+++ b/dsRag/Milvus/Test/T4_select_all_data.py
@ -1,45 +0,0 @@
-from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
-from WxMini.Milvus.Utils.MilvusConnectionPool import *
-from WxMini.Milvus.Config.MulvusConfig import *
-
-# 1. 使用连接池管理 Milvus 连接
-milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
-
-# 2. 从连接池中获取一个连接
-connection = milvus_pool.get_connection()
-
-# 3. 初始化集合管理器
-collection_name = MS_COLLECTION_NAME
-collection_manager = MilvusCollectionManager(collection_name)
-
-# 4. 加载集合到内存
-collection_manager.load_collection()
-print(f"集合 '{collection_name}' 已加载到内存。")
-
-# 5. 查询所有数据
-try:
-    # 使用 Milvus 的 query 方法查询所有数据
-    results = collection_manager.collection.query(
-        expr="",  # 空表达式表示查询所有数据
-        output_fields=["id", "text", "embedding"],  # 指定返回的字段
-        limit=1000  # 设置最大返回记录数
-    )
-    print("查询结果：")
-    if results:
-        for result in results:
-            try:
-                text = result["text"]  # 获取 text 字段
-                embedding = result["embedding"]  # 获取 embedding 字段
-                print(f"ID: {result['id']}, Text: {text}, Embedding: {embedding[:5]}...")  # 只打印前 5 维向量
-            except Exception as e:
-                print(f"查询失败: {e}")
-    else:
-        print("未找到相关数据，请检查查询参数或数据。")
-except Exception as e:
-    print(f"查询失败: {e}")
-
-# 6. 释放连接
-milvus_pool.release_connection(connection)
-
-# 7. 关闭连接池
-milvus_pool.close()
--- a/dsRag/Milvus/Test/T5_search_near_data.py
+++ b/dsRag/Milvus/Test/T5_search_near_data.py
@ -1,78 +0,0 @@
-import time
-import jieba  # 导入 jieba 分词库
-from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
-from WxMini.Milvus.Utils.MilvusConnectionPool import *
-from WxMini.Milvus.Config.MulvusConfig import *
-from gensim.models import KeyedVectors
-
-# 1. 加载预训练的 Word2Vec 模型
-model_path = MS_MODEL_PATH  # 替换为你的 Word2Vec 模型路径
-model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT)
-print(f"模型加载成功，词向量维度: {model.vector_size}")
-
-
-# 将文本转换为嵌入向量
-def text_to_embedding(text):
-    words = jieba.lcut(text)  # 使用 jieba 分词
-    print(f"文本: {text}, 分词结果: {words}")
-    embeddings = [model[word] for word in words if word in model]  # 获取有效词向量
-    print(f"有效词向量数量: {len(embeddings)}")
-    if embeddings:
-        avg_embedding = sum(embeddings) / len(embeddings)  # 计算平均向量
-        print(f"生成的平均向量: {avg_embedding[:5]}...")  # 打印前 5 维
-        return avg_embedding
-    else:
-        print("未找到有效词，返回零向量")
-        return [0.0] * model.vector_size
-
-
-# 2. 使用连接池管理 Milvus 连接
-milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
-
-# 3. 从连接池中获取一个连接
-connection = milvus_pool.get_connection()
-
-# 4. 初始化集合管理器
-collection_name = MS_COLLECTION_NAME
-collection_manager = MilvusCollectionManager(collection_name)
-
-# 5. 加载集合到内存
-collection_manager.load_collection()
-
-# 6. 输入一句话
-input_text = input("请输入一句话：")  # 例如：“我今天心情不太好”
-
-# 7. 将文本转换为嵌入向量
-current_embedding = text_to_embedding(input_text)
-print(f"当前文本的向量: {current_embedding[:5]}...")  # 打印前 5 维
-
-# 8. 查询与当前对话最相关的历史对话
-search_params = {
-    "metric_type": "L2",  # 使用 L2 距离度量方式
-    "params": {"nprobe": MS_NPROBE}  # 设置 IVF_FLAT 的 nprobe 参数
-}
-start_time = time.time()
-results = collection_manager.search(current_embedding, search_params, limit=2)  # 返回 2 条结果
-end_time = time.time()
-
-# 9. 输出查询结果
-print("最相关的历史对话:")
-if results:
-    for hits in results:
-        for hit in hits:
-            try:
-                text = collection_manager.query_text_by_id(hit.id)
-                print(f"- {text} (距离: {hit.distance})")
-            except Exception as e:
-                print(f"查询失败: {e}")
-else:
-    print("未找到相关历史对话，请检查查询参数或数据。")
-
-# 10. 输出查询耗时
-print(f"查询耗时: {end_time - start_time:.4f} 秒")
-
-# 11. 释放连接
-milvus_pool.release_connection(connection)
-
-# 12. 关闭连接池
-milvus_pool.close()
--- a/dsRag/Milvus/Test/init.py
+++ b/dsRag/Milvus/Test/init.py