diff --git a/AI/WxMini/Milvus/T1_create_collection.py b/AI/WxMini/Milvus/T1_create_collection.py index 99918cb5..fa2e53b5 100644 --- a/AI/WxMini/Milvus/T1_create_collection.py +++ b/AI/WxMini/Milvus/T1_create_collection.py @@ -3,6 +3,11 @@ from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from WxMini.Milvus.Utils.MilvusConnectionPool import * from WxMini.Milvus.Config.MulvusConfig import * +from pymilvus import FieldSchema, DataType, utility +from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager +from WxMini.Milvus.Utils.MilvusConnectionPool import * +from WxMini.Milvus.Config.MulvusConfig import * + # 1. 使用连接池管理 Milvus 连接 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) @@ -22,6 +27,7 @@ if utility.has_collection(collection_name): # 5. 定义集合的字段和模式 fields = [ FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), # 主键字段,自动生成 ID + FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500), # 存储对话文本 FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128) # 向量字段,维度为 128 ] schema_description = "Simple demo collection" diff --git a/AI/WxMini/Milvus/T3_insert_data.py b/AI/WxMini/Milvus/T3_insert_data.py index 0ba3ed9c..a45c688a 100644 --- a/AI/WxMini/Milvus/T3_insert_data.py +++ b/AI/WxMini/Milvus/T3_insert_data.py @@ -4,6 +4,12 @@ from WxMini.Milvus.Config.MulvusConfig import * from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from WxMini.Milvus.Utils.MilvusConnectionPool import * +import random + +from WxMini.Milvus.Config.MulvusConfig import * +from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager +from WxMini.Milvus.Utils.MilvusConnectionPool import * + # 1. 使用连接池管理 Milvus 连接 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) @@ -15,12 +21,19 @@ collection_name = MS_COLLECTION_NAME collection_manager = MilvusCollectionManager(collection_name) # 4. 插入数据 -data = [ +texts = [ + "我今天心情不太好,因为工作压力很大。", # 第一个对话文本 + "我最近在学习 Python,感觉很有趣。", # 第二个对话文本 + "我打算周末去爬山,放松一下。" # 第三个对话文本 +] +embeddings = [ [random.random() for _ in range(128)], # 第一个 128 维向量 [random.random() for _ in range(128)], # 第二个 128 维向量 [random.random() for _ in range(128)] # 第三个 128 维向量 ] -entities = [data] # 插入的数据 + +# 插入数据,确保字段顺序与集合定义一致 +entities = [texts, embeddings] # 第一个列表是 text 字段,第二个列表是 embedding 字段 collection_manager.insert_data(entities) print("数据插入成功。") @@ -35,9 +48,13 @@ search_params = { } results = collection_manager.search(query_vector, search_params, limit=2) print("查询结果:") -for hits in results: - for hit in hits: - print(f"ID: {hit.id}, Distance: {hit.distance}") +if results: + for hits in results: + for hit in hits: + text = collection_manager.query_text_by_id(hit.id) + print(f"ID: {hit.id}, Text: {text}, Distance: {hit.distance}") +else: + print("未找到相关数据,请检查查询参数或数据。") # 7. 释放连接 milvus_pool.release_connection(connection) diff --git a/AI/WxMini/Milvus/T4_search_data.py b/AI/WxMini/Milvus/T4_search_data.py index 23fd744e..3f84f22a 100644 --- a/AI/WxMini/Milvus/T4_search_data.py +++ b/AI/WxMini/Milvus/T4_search_data.py @@ -26,9 +26,13 @@ search_params = { } results = collection_manager.search(query_vector, search_params, limit=20) print("查询结果:") -for hits in results: - for hit in hits: - print(f"ID: {hit.id}, Distance: {hit.distance}") +if results: + for hits in results: + for hit in hits: + text = collection_manager.query_text_by_id(hit.id) # 获取 text 字段 + print(f"ID: {hit.id}, Text: {text}, Distance: {hit.distance}") +else: + print("未找到相关数据,请检查查询参数或数据。") # 6. 释放连接 milvus_pool.release_connection(connection) diff --git a/AI/WxMini/Milvus/T5_search_near_word.py b/AI/WxMini/Milvus/T5_search_near_word.py new file mode 100644 index 00000000..f7e03aa5 --- /dev/null +++ b/AI/WxMini/Milvus/T5_search_near_word.py @@ -0,0 +1,52 @@ +import numpy as np +import time +from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager +from WxMini.Milvus.Utils.MilvusConnectionPool import * +from WxMini.Milvus.Config.MulvusConfig import * +# 1. 使用连接池管理 Milvus 连接 +milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) + +# 2. 从连接池中获取一个连接 +connection = milvus_pool.get_connection() + +# 3. 初始化集合管理器 +collection_name = MS_COLLECTION_NAME +collection_manager = MilvusCollectionManager(collection_name) + +# 4. 加载集合到内存 +collection_manager.load_collection() + +# 5. 模拟当前对话的嵌入向量 +current_embedding = np.random.random(128).tolist() # 随机生成一个 128 维向量 + +# 6. 查询与当前对话最相关的历史对话 +search_params = { + "metric_type": "L2", # 使用 L2 距离度量方式 + "params": {"nprobe": 100} # 设置 IVF_FLAT 的 nprobe 参数 +} +start_time = time.time() +results = collection_manager.search(current_embedding, search_params, limit=2) +end_time = time.time() + +# 7. 输出查询结果 +print("当前对话的嵌入向量:", current_embedding) +print("最相关的历史对话:") +if results: + for hits in results: + for hit in hits: + try: + text = collection_manager.query_text_by_id(hit.id) + print(f"- {text} (距离: {hit.distance})") + except Exception as e: + print(f"查询失败: {e}") +else: + print("未找到相关历史对话,请检查查询参数或数据。") + +# 8. 输出查询耗时 +print(f"查询耗时: {end_time - start_time:.4f} 秒") + +# 9. 释放连接 +milvus_pool.release_connection(connection) + +# 10. 关闭连接池 +milvus_pool.close() \ No newline at end of file diff --git a/AI/WxMini/Milvus/TestMilvusNearWord.py b/AI/WxMini/Milvus/TestMilvusNearWord.py deleted file mode 100644 index a8e9f567..00000000 --- a/AI/WxMini/Milvus/TestMilvusNearWord.py +++ /dev/null @@ -1,94 +0,0 @@ -from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility -import numpy as np -import time - -# 1. 连接 Milvus -connections.connect("default", host="10.10.14.101", port="19530") - -# 2. 定义集合的字段和模式 -fields = [ - FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), - FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500), # 存储对话文本 - FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128) # 假设嵌入向量维度为 128 -] -schema = CollectionSchema(fields, description="Conversation history collection") - -# 3. 创建集合 -collection_name = "conversation_history" -if utility.has_collection(collection_name): - utility.drop_collection(collection_name) # 如果集合已存在,先删除 - -collection = Collection(name=collection_name, schema=schema) - -# 4. 创建索引 -index_params = { - "index_type": "IVF_FLAT", # 索引类型 - "metric_type": "L2", # 距离度量方式 - "params": {"nlist": 256} # 增加 nlist 的值 -} -collection.create_index("embedding", index_params) - -# 5. 加载集合 -collection.load() - -# 6. 模拟多次对话 -# 假设历史对话的嵌入向量已经生成 -history = [ - {"text": "我今天心情不太好,因为工作压力很大。", "embedding": np.random.random(128).tolist()}, - {"text": "我最近在学习 Python,感觉很有趣。", "embedding": np.random.random(128).tolist()}, - {"text": "我打算周末去爬山,放松一下。", "embedding": np.random.random(128).tolist()}, - {"text": "我昨天看了一部很棒的电影,推荐给你。", "embedding": np.random.random(128).tolist()} -] - -# 将历史对话插入 Milvus -for item in history: - insert_result = collection.insert([[item["text"]], [item["embedding"]]]) - print(f"插入结果: {insert_result}") - -# 刷新集合 -collection.flush() - -# 调试:查询集合中的所有数据 -print("集合中的数据:") -result = collection.query(expr="", output_fields=["id", "text"], limit=10) -if result: - for item in result: - print(f"ID: {item['id']}, Text: {item['text']}") -else: - print("集合中没有数据,请检查数据插入步骤。") - -# 7. 模拟当前对话 -# 假设当前对话的嵌入向量已经生成 -current_text = "我最近工作压力很大,想找个方式放松一下。" -current_embedding = np.random.random(128).tolist() - -# 8. 查询与当前对话最相关的历史对话 -search_params = { - "metric_type": "L2", - "params": {"nprobe": 100} # 增加 nprobe 的值 -} -start_time = time.time() -results = collection.search( - data=[current_embedding], # 查询向量 - anns_field="embedding", # 查询字段 - param=search_params, - limit=2 # 返回最相似的 2 个结果 -) -end_time = time.time() - -# 9. 输出查询结果 -print("当前对话:", current_text) -print("最相关的历史对话:") -if results: - for hits in results: - for hit in hits: - text = collection.query(expr=f"id == {hit.id}", output_fields=["text"])[0]["text"] - print(f"- {text} (距离: {hit.distance})") -else: - print("未找到相关历史对话,请检查查询参数或数据。") - -# 输出查询耗时 -print(f"查询耗时: {end_time - start_time:.4f} 秒") - -# 10. 关闭连接 -connections.disconnect("default") \ No newline at end of file diff --git a/AI/WxMini/Milvus/Utils/MilvusCollectionManager.py b/AI/WxMini/Milvus/Utils/MilvusCollectionManager.py index 8fd36f40..8894b477 100644 --- a/AI/WxMini/Milvus/Utils/MilvusCollectionManager.py +++ b/AI/WxMini/Milvus/Utils/MilvusCollectionManager.py @@ -1,4 +1,3 @@ -# MilvusCollectionManager.py from pymilvus import Collection, utility, CollectionSchema @@ -34,16 +33,6 @@ class MilvusCollectionManager: self.collection = Collection(name=self.collection_name, schema=schema) print(f"集合 '{self.collection_name}' 创建成功。") - def insert_data(self, entities): - """ - 插入数据 - :param entities: 要插入的数据 - """ - if self.collection is None: - raise Exception("集合未创建,请先调用 create_collection 方法") - self.collection.insert(entities) - print("数据插入成功。") - def create_index(self, field_name, index_params): """ 创建索引 @@ -51,32 +40,62 @@ class MilvusCollectionManager: :param index_params: 索引参数 """ if self.collection is None: - raise Exception("集合未创建,请先调用 create_collection 方法") + raise Exception("集合未加载,请检查集合是否存在。") self.collection.create_index(field_name, index_params) print("索引创建成功。") + def insert_data(self, entities): + """ + 插入数据 + :param entities: 数据实体,格式为 [texts, embeddings] + """ + if self.collection is None: + raise Exception("集合未加载,请检查集合是否存在。") + self.collection.insert(entities) + print("数据插入成功。") def load_collection(self): """ 加载集合到内存 """ if self.collection is None: - raise Exception("集合未创建,请先调用 create_collection 方法") + raise Exception("集合未加载,请检查集合是否存在。") self.collection.load() print("集合已加载到内存。") - def search(self, query_vector, search_params, limit=2): + def search(self, query_embedding, search_params, limit=2): """ 查询数据 - :param query_vector: 查询向量 + :param query_embedding: 查询向量 :param search_params: 查询参数 :param limit: 返回结果数量 :return: 查询结果 """ if self.collection is None: - raise Exception("集合未创建,请先调用 create_collection 方法") + raise Exception("集合未加载,请检查集合是否存在。") return self.collection.search( - data=[query_vector], + data=[query_embedding], anns_field="embedding", param=search_params, limit=limit - ) \ No newline at end of file + ) + + def query_text_by_id(self, id): + """ + 根据 ID 查询对话文本 + :param id: 数据 ID + :return: 对话文本 + """ + if self.collection is None: + raise Exception("集合未加载,请检查集合是否存在。") + + # 检查集合的字段定义 + schema = self.collection.schema + field_names = [field.name for field in schema.fields] + if "text" not in field_names: + raise Exception(f"集合 '{self.collection_name}' 中不存在 'text' 字段,请检查集合定义。") + + result = self.collection.query(expr=f"id == {id}", output_fields=["text"]) + if result: + return result[0]["text"] + else: + return None \ No newline at end of file diff --git a/AI/WxMini/Milvus/Utils/__pycache__/MilvusCollectionManager.cpython-310.pyc b/AI/WxMini/Milvus/Utils/__pycache__/MilvusCollectionManager.cpython-310.pyc index 36fe8f91..a88defee 100644 Binary files a/AI/WxMini/Milvus/Utils/__pycache__/MilvusCollectionManager.cpython-310.pyc and b/AI/WxMini/Milvus/Utils/__pycache__/MilvusCollectionManager.cpython-310.pyc differ