'commit'

4 months ago · 840156e590
parent dde4355238
commit 840156e590
7 changed files with 124 additions and 120 deletions
--- a/AI/WxMini/Milvus/T1_create_collection.py
+++ b/AI/WxMini/Milvus/T1_create_collection.py
@ -3,6 +3,11 @@ from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
 from WxMini.Milvus.Utils.MilvusConnectionPool import *
 from WxMini.Milvus.Config.MulvusConfig import *

+from pymilvus import FieldSchema, DataType, utility
+from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
+from WxMini.Milvus.Utils.MilvusConnectionPool import *
+from WxMini.Milvus.Config.MulvusConfig import *
+
 # 1. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

@ -22,6 +27,7 @@ if utility.has_collection(collection_name):
 # 5. 定义集合的字段和模式
 fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),  # 主键字段，自动生成 ID
+    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500),  # 存储对话文本
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128)  # 向量字段，维度为 128
 ]
 schema_description = "Simple demo collection"
--- a/AI/WxMini/Milvus/T3_insert_data.py
+++ b/AI/WxMini/Milvus/T3_insert_data.py
@ -4,6 +4,12 @@ from WxMini.Milvus.Config.MulvusConfig import *
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
 from WxMini.Milvus.Utils.MilvusConnectionPool import *

+import random
+
+from WxMini.Milvus.Config.MulvusConfig import *
+from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
+from WxMini.Milvus.Utils.MilvusConnectionPool import *
+
 # 1. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

@ -15,12 +21,19 @@ collection_name = MS_COLLECTION_NAME
 collection_manager = MilvusCollectionManager(collection_name)

 # 4. 插入数据
-data = [
+texts = [
+    "我今天心情不太好，因为工作压力很大。",  # 第一个对话文本
+    "我最近在学习 Python，感觉很有趣。",   # 第二个对话文本
+    "我打算周末去爬山，放松一下。"         # 第三个对话文本
+]
+embeddings = [
    [random.random() for _ in range(128)],  # 第一个 128 维向量
    [random.random() for _ in range(128)],  # 第二个 128 维向量
    [random.random() for _ in range(128)]   # 第三个 128 维向量
 ]
-entities = [data]  # 插入的数据
+
+# 插入数据，确保字段顺序与集合定义一致
+entities = [texts, embeddings]  # 第一个列表是 text 字段，第二个列表是 embedding 字段
 collection_manager.insert_data(entities)
 print("数据插入成功。")

@ -35,9 +48,13 @@ search_params = {
 }
 results = collection_manager.search(query_vector, search_params, limit=2)
 print("查询结果：")
-for hits in results:
-    for hit in hits:
-        print(f"ID: {hit.id}, Distance: {hit.distance}")
+if results:
+    for hits in results:
+        for hit in hits:
+            text = collection_manager.query_text_by_id(hit.id)
+            print(f"ID: {hit.id}, Text: {text}, Distance: {hit.distance}")
+else:
+    print("未找到相关数据，请检查查询参数或数据。")

 # 7. 释放连接
 milvus_pool.release_connection(connection)
--- a/AI/WxMini/Milvus/T4_search_data.py
+++ b/AI/WxMini/Milvus/T4_search_data.py
@ -26,9 +26,13 @@ search_params = {
 }
 results = collection_manager.search(query_vector, search_params, limit=20)
 print("查询结果：")
-for hits in results:
-    for hit in hits:
-        print(f"ID: {hit.id}, Distance: {hit.distance}")
+if results:
+    for hits in results:
+        for hit in hits:
+            text = collection_manager.query_text_by_id(hit.id)  # 获取 text 字段
+            print(f"ID: {hit.id}, Text: {text}, Distance: {hit.distance}")
+else:
+    print("未找到相关数据，请检查查询参数或数据。")

 # 6. 释放连接
 milvus_pool.release_connection(connection)
--- a/AI/WxMini/Milvus/T5_search_near_word.py
+++ b/AI/WxMini/Milvus/T5_search_near_word.py
@ -0,0 +1,52 @@
+import numpy as np
+import time
+from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
+from WxMini.Milvus.Utils.MilvusConnectionPool import *
+from WxMini.Milvus.Config.MulvusConfig import *
+# 1. 使用连接池管理 Milvus 连接
+milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
+
+# 2. 从连接池中获取一个连接
+connection = milvus_pool.get_connection()
+
+# 3. 初始化集合管理器
+collection_name = MS_COLLECTION_NAME
+collection_manager = MilvusCollectionManager(collection_name)
+
+# 4. 加载集合到内存
+collection_manager.load_collection()
+
+# 5. 模拟当前对话的嵌入向量
+current_embedding = np.random.random(128).tolist()  # 随机生成一个 128 维向量
+
+# 6. 查询与当前对话最相关的历史对话
+search_params = {
+    "metric_type": "L2",       # 使用 L2 距离度量方式
+    "params": {"nprobe": 100}  # 设置 IVF_FLAT 的 nprobe 参数
+}
+start_time = time.time()
+results = collection_manager.search(current_embedding, search_params, limit=2)
+end_time = time.time()
+
+# 7. 输出查询结果
+print("当前对话的嵌入向量:", current_embedding)
+print("最相关的历史对话:")
+if results:
+    for hits in results:
+        for hit in hits:
+            try:
+                text = collection_manager.query_text_by_id(hit.id)
+                print(f"- {text} (距离: {hit.distance})")
+            except Exception as e:
+                print(f"查询失败: {e}")
+else:
+    print("未找到相关历史对话，请检查查询参数或数据。")
+
+# 8. 输出查询耗时
+print(f"查询耗时: {end_time - start_time:.4f} 秒")
+
+# 9. 释放连接
+milvus_pool.release_connection(connection)
+
+# 10. 关闭连接池
+milvus_pool.close()
--- a/AI/WxMini/Milvus/TestMilvusNearWord.py
+++ b/AI/WxMini/Milvus/TestMilvusNearWord.py
@ -1,94 +0,0 @@
-from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
-import numpy as np
-import time
-
-# 1. 连接 Milvus
-connections.connect("default", host="10.10.14.101", port="19530")
-
-# 2. 定义集合的字段和模式
-fields = [
-    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
-    FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=500),  # 存储对话文本
-    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128)  # 假设嵌入向量维度为 128
-]
-schema = CollectionSchema(fields, description="Conversation history collection")
-
-# 3. 创建集合
-collection_name = "conversation_history"
-if utility.has_collection(collection_name):
-    utility.drop_collection(collection_name)  # 如果集合已存在，先删除
-
-collection = Collection(name=collection_name, schema=schema)
-
-# 4. 创建索引
-index_params = {
-    "index_type": "IVF_FLAT",  # 索引类型
-    "metric_type": "L2",       # 距离度量方式
-    "params": {"nlist": 256}   # 增加 nlist 的值
-}
-collection.create_index("embedding", index_params)
-
-# 5. 加载集合
-collection.load()
-
-# 6. 模拟多次对话
-# 假设历史对话的嵌入向量已经生成
-history = [
-    {"text": "我今天心情不太好，因为工作压力很大。", "embedding": np.random.random(128).tolist()},
-    {"text": "我最近在学习 Python，感觉很有趣。", "embedding": np.random.random(128).tolist()},
-    {"text": "我打算周末去爬山，放松一下。", "embedding": np.random.random(128).tolist()},
-    {"text": "我昨天看了一部很棒的电影，推荐给你。", "embedding": np.random.random(128).tolist()}
-]
-
-# 将历史对话插入 Milvus
-for item in history:
-    insert_result = collection.insert([[item["text"]], [item["embedding"]]])
-    print(f"插入结果: {insert_result}")
-
-# 刷新集合
-collection.flush()
-
-# 调试：查询集合中的所有数据
-print("集合中的数据:")
-result = collection.query(expr="", output_fields=["id", "text"], limit=10)
-if result:
-    for item in result:
-        print(f"ID: {item['id']}, Text: {item['text']}")
-else:
-    print("集合中没有数据，请检查数据插入步骤。")
-
-# 7. 模拟当前对话
-# 假设当前对话的嵌入向量已经生成
-current_text = "我最近工作压力很大，想找个方式放松一下。"
-current_embedding = np.random.random(128).tolist()
-
-# 8. 查询与当前对话最相关的历史对话
-search_params = {
-    "metric_type": "L2",
-    "params": {"nprobe": 100}  # 增加 nprobe 的值
-}
-start_time = time.time()
-results = collection.search(
-    data=[current_embedding],  # 查询向量
-    anns_field="embedding",    # 查询字段
-    param=search_params,
-    limit=2  # 返回最相似的 2 个结果
-)
-end_time = time.time()
-
-# 9. 输出查询结果
-print("当前对话:", current_text)
-print("最相关的历史对话:")
-if results:
-    for hits in results:
-        for hit in hits:
-            text = collection.query(expr=f"id == {hit.id}", output_fields=["text"])[0]["text"]
-            print(f"- {text} (距离: {hit.distance})")
-else:
-    print("未找到相关历史对话，请检查查询参数或数据。")
-
-# 输出查询耗时
-print(f"查询耗时: {end_time - start_time:.4f} 秒")
-
-# 10. 关闭连接
-connections.disconnect("default")
--- a/AI/WxMini/Milvus/Utils/MilvusCollectionManager.py
+++ b/AI/WxMini/Milvus/Utils/MilvusCollectionManager.py
@ -1,4 +1,3 @@
-# MilvusCollectionManager.py
 from pymilvus import Collection, utility, CollectionSchema


@ -34,16 +33,6 @@ class MilvusCollectionManager:
        self.collection = Collection(name=self.collection_name, schema=schema)
        print(f"集合 '{self.collection_name}' 创建成功。")

-    def insert_data(self, entities):
-        """
-        插入数据
-        :param entities: 要插入的数据
-        """
-        if self.collection is None:
-            raise Exception("集合未创建，请先调用 create_collection 方法")
-        self.collection.insert(entities)
-        print("数据插入成功。")
-
    def create_index(self, field_name, index_params):
        """
        创建索引
@ -51,32 +40,62 @@ class MilvusCollectionManager:
        :param index_params: 索引参数
        """
        if self.collection is None:
-            raise Exception("集合未创建，请先调用 create_collection 方法")
+            raise Exception("集合未加载，请检查集合是否存在。")
        self.collection.create_index(field_name, index_params)
        print("索引创建成功。")

+    def insert_data(self, entities):
+        """
+        插入数据
+        :param entities: 数据实体，格式为 [texts, embeddings]
+        """
+        if self.collection is None:
+            raise Exception("集合未加载，请检查集合是否存在。")
+        self.collection.insert(entities)
+        print("数据插入成功。")
    def load_collection(self):
        """
        加载集合到内存
        """
        if self.collection is None:
-            raise Exception("集合未创建，请先调用 create_collection 方法")
+            raise Exception("集合未加载，请检查集合是否存在。")
        self.collection.load()
        print("集合已加载到内存。")

-    def search(self, query_vector, search_params, limit=2):
+    def search(self, query_embedding, search_params, limit=2):
        """
        查询数据
-        :param query_vector: 查询向量
+        :param query_embedding: 查询向量
        :param search_params: 查询参数
        :param limit: 返回结果数量
        :return: 查询结果
        """
        if self.collection is None:
-            raise Exception("集合未创建，请先调用 create_collection 方法")
+            raise Exception("集合未加载，请检查集合是否存在。")
        return self.collection.search(
-            data=[query_vector],
+            data=[query_embedding],
            anns_field="embedding",
            param=search_params,
            limit=limit
-        )
+        )
+
+    def query_text_by_id(self, id):
+        """
+        根据 ID 查询对话文本
+        :param id: 数据 ID
+        :return: 对话文本
+        """
+        if self.collection is None:
+            raise Exception("集合未加载，请检查集合是否存在。")
+
+        # 检查集合的字段定义
+        schema = self.collection.schema
+        field_names = [field.name for field in schema.fields]
+        if "text" not in field_names:
+            raise Exception(f"集合 '{self.collection_name}' 中不存在 'text' 字段，请检查集合定义。")
+
+        result = self.collection.query(expr=f"id == {id}", output_fields=["text"])
+        if result:
+            return result[0]["text"]
+        else:
+            return None
--- a/AI/WxMini/Milvus/Utils/pycache/MilvusCollectionManager.cpython-310.pyc
+++ b/AI/WxMini/Milvus/Utils/pycache/MilvusCollectionManager.cpython-310.pyc