QingLong/AI/WxMini/Milvus/T1.py

# 安装 pymilvus 库（如果尚未安装）
# cd D:\anaconda3\envs\py310\Scripts
# pip install pymilvus DBUtils

# 导入必要的模块
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
from queue import Queue
import threading
import random
from MulvusConfig import *

# 1. 手动实现 Milvus 连接池
class MilvusConnectionPool:
    def __init__(self, host, port, max_connections=5):
        """
        初始化 Milvus 连接池
        :param host: Milvus 主机地址
        :param port: Milvus 端口
        :param max_connections: 最大连接数
        """
        self.host = host
        self.port = port
        self.max_connections = max_connections
        self._pool = Queue(max_connections)
        self._lock = threading.Lock()

        # 初始化连接池
        for _ in range(max_connections):
            self._pool.put(self._create_connection())

    def _create_connection(self):
        """
        创建一个新的 Milvus 连接
        :return: Milvus 连接对象
        """
        return connections.connect(host=self.host, port=self.port, alias="default")

    def get_connection(self):
        """
        从连接池中获取一个连接
        :return: Milvus 连接对象
        """
        with self._lock:
            if not self._pool.empty():
                return self._pool.get()
            else:
                raise Exception("连接池已满，无法获取连接")

    def release_connection(self, connection):
        """
        释放连接，将其放回连接池
        :param connection: Milvus 连接对象
        """
        with self._lock:
            if self._pool.qsize() < self.max_connections:
                self._pool.put(connection)
            else:
                connections.disconnect("default")

    def close(self):
        """
        关闭连接池，释放所有连接
        """
        with self._lock:
            while not self._pool.empty():
                connection = self._pool.get()
                connections.disconnect("default")

# 2. 使用连接池管理 Milvus 连接
# 创建 Milvus 连接池，指定主机地址和端口
milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)

# 从连接池中获取一个连接
connection = milvus_pool.get_connection()

# 3. 定义集合的字段和模式
# 创建字段列表，定义集合的结构
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),  # 主键字段，自动生成 ID
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128)  # 向量字段，维度为 128
]
# 创建集合模式，包含字段列表和描述
schema = CollectionSchema(fields, description="Simple demo collection")

# 4. 创建集合
# 定义集合名称
collection_name = "demo_collection"
# 检查集合是否已存在，如果存在则删除
if utility.has_collection(collection_name):
    utility.drop_collection(collection_name)  # 删除已存在的集合
# 使用集合模式和名称创建新集合
collection = Collection(name=collection_name, schema=schema)

# 5. 插入数据
# 生成随机向量数据，模拟插入的向量
data = [
    [random.random() for _ in range(128)],  # 第一个 128 维向量
    [random.random() for _ in range(128)],  # 第二个 128 维向量
    [random.random() for _ in range(128)]   # 第三个 128 维向量
]
# 将数据包装为实体列表
entities = [data]  # 插入的数据
# 将数据插入集合
collection.insert(entities)

# 6. 创建索引
# 定义索引参数
index_params = {
    "index_type": "IVF_FLAT",  # 使用 IVF_FLAT 索引类型
    "metric_type": "L2",       # 使用 L2 距离度量方式
    "params": {"nlist": 128}   # 设置 IVF_FLAT 的 nlist 参数
}
# 在向量字段上创建索引
collection.create_index("embedding", index_params)

# 7. 加载集合到内存
# 将集合加载到内存中，以便进行查询
collection.load()

# 8. 查询数据
# 生成一个随机查询向量
query_vector = [random.random() for _ in range(128)]  # 查询向量
# 定义查询参数
search_params = {
    "metric_type": "L2",       # 使用 L2 距离度量方式
    "params": {"nprobe": 10}   # 设置 IVF_FLAT 的 nprobe 参数
}
# 执行查询
results = collection.search(
    data=[query_vector],  # 查询向量
    anns_field="embedding",  # 查询的向量字段
    param=search_params,  # 查询参数
    limit=2  # 返回最相似的 2 个结果
)

# 9. 输出查询结果
# 遍历查询结果，输出每个结果的 ID 和距离
for hits in results:
    for hit in hits:
        print(f"ID: {hit.id}, Distance: {hit.distance}")

# 10. 释放连接
# 将连接放回连接池
milvus_pool.release_connection(connection)

# 11. 关闭连接池
# 关闭 Milvus 连接池
milvus_pool.close()