'commit'

4 months ago · 795ba9028f
parent 99fa38f1fe
commit 795ba9028f
5 changed files with 160 additions and 61 deletions
--- a/AI/WxMini/Milvus/MulvusConfig.py
+++ b/AI/WxMini/Milvus/MulvusConfig.py
@ -0,0 +1,12 @@
 MS_HOST = "10.10.14.101"
 MS_PORT = "19530"
 MS_COLLECTION_NAME = "ds_collection"
 MS_MAX_CONNECTIONS = 5
 # "dimension": 128
 # "index_file_size": 1024
 # "metric_type": "L2"
 # "nlist": 1024
 # "nprobe": 16
 # "topk": 10
 # "batch_size": 100
 # "index_type": "IVF_FLAT"
--- a/AI/WxMini/Milvus/T1.py
+++ b/AI/WxMini/Milvus/T1.py
@ -0,0 +1,148 @@
 # 安装 pymilvus 库（如果尚未安装）
 # cd D:\anaconda3\envs\py310\Scripts
 # pip install pymilvus DBUtils
 # 导入必要的模块
 from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
 from queue import Queue
 import threading
 import random
 from MulvusConfig import *
 # 1. 手动实现 Milvus 连接池
 class MilvusConnectionPool:
    def __init__(self, host, port, max_connections=5):
        """
        初始化 Milvus 连接池
        :param host: Milvus 主机地址
        :param port: Milvus 端口
        :param max_connections: 最大连接数
        """
        self.host = host
        self.port = port
        self.max_connections = max_connections
        self._pool = Queue(max_connections)
        self._lock = threading.Lock()
        # 初始化连接池
        for _ in range(max_connections):
            self._pool.put(self._create_connection())
    def _create_connection(self):
        """
        创建一个新的 Milvus 连接
        :return: Milvus 连接对象
        """
        return connections.connect(host=self.host, port=self.port, alias="default")
    def get_connection(self):
        """
        从连接池中获取一个连接
        :return: Milvus 连接对象
        """
        with self._lock:
            if not self._pool.empty():
                return self._pool.get()
            else:
                raise Exception("连接池已满，无法获取连接")
    def release_connection(self, connection):
        """
        释放连接，将其放回连接池
        :param connection: Milvus 连接对象
        """
        with self._lock:
            if self._pool.qsize() < self.max_connections:
                self._pool.put(connection)
            else:
                connections.disconnect("default")
    def close(self):
        """
        关闭连接池，释放所有连接
        """
        with self._lock:
            while not self._pool.empty():
                connection = self._pool.get()
                connections.disconnect("default")
 # 2. 使用连接池管理 Milvus 连接
 # 创建 Milvus 连接池，指定主机地址和端口
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
 # 从连接池中获取一个连接
 connection = milvus_pool.get_connection()
 # 3. 定义集合的字段和模式
 # 创建字段列表，定义集合的结构
 fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),  # 主键字段，自动生成 ID
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128)  # 向量字段，维度为 128
 ]
 # 创建集合模式，包含字段列表和描述
 schema = CollectionSchema(fields, description="Simple demo collection")
 # 4. 创建集合
 # 定义集合名称
 collection_name = "demo_collection"
 # 检查集合是否已存在，如果存在则删除
 if utility.has_collection(collection_name):
    utility.drop_collection(collection_name)  # 删除已存在的集合
 # 使用集合模式和名称创建新集合
 collection = Collection(name=collection_name, schema=schema)
 # 5. 插入数据
 # 生成随机向量数据，模拟插入的向量
 data = [
    [random.random() for _ in range(128)],  # 第一个 128 维向量
    [random.random() for _ in range(128)],  # 第二个 128 维向量
    [random.random() for _ in range(128)]   # 第三个 128 维向量
 ]
 # 将数据包装为实体列表
 entities = [data]  # 插入的数据
 # 将数据插入集合
 collection.insert(entities)
 # 6. 创建索引
 # 定义索引参数
 index_params = {
    "index_type": "IVF_FLAT",  # 使用 IVF_FLAT 索引类型
    "metric_type": "L2",       # 使用 L2 距离度量方式
    "params": {"nlist": 128}   # 设置 IVF_FLAT 的 nlist 参数
 }
 # 在向量字段上创建索引
 collection.create_index("embedding", index_params)
 # 7. 加载集合到内存
 # 将集合加载到内存中，以便进行查询
 collection.load()
 # 8. 查询数据
 # 生成一个随机查询向量
 query_vector = [random.random() for _ in range(128)]  # 查询向量
 # 定义查询参数
 search_params = {
    "metric_type": "L2",       # 使用 L2 距离度量方式
    "params": {"nprobe": 10}   # 设置 IVF_FLAT 的 nprobe 参数
 }
 # 执行查询
 results = collection.search(
    data=[query_vector],  # 查询向量
    anns_field="embedding",  # 查询的向量字段
    param=search_params,  # 查询参数
    limit=2  # 返回最相似的 2 个结果
 )
 # 9. 输出查询结果
 # 遍历查询结果，输出每个结果的 ID 和距离
 for hits in results:
    for hit in hits:
        print(f"ID: {hit.id}, Distance: {hit.distance}")
 # 10. 释放连接
 # 将连接放回连接池
 milvus_pool.release_connection(connection)
 # 11. 关闭连接池
 # 关闭 Milvus 连接池
 milvus_pool.close()
--- a/AI/WxMini/Milvus/TestMilvusNearWord.py
+++ b/AI/WxMini/Milvus/TestMilvusNearWord.py
--- a/AI/WxMini/Milvus/pycache/MulvusConfig.cpython-310.pyc
+++ b/AI/WxMini/Milvus/pycache/MulvusConfig.cpython-310.pyc
--- a/AI/WxMini/TestMilvusBase.py
+++ b/AI/WxMini/TestMilvusBase.py
@ -1,61 +0,0 @@
 # D:\anaconda3\envs\py310\Scripts\pip.exe install pymilvus
 from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
 # 1. 连接 Milvus
 connections.connect("default", host="10.10.14.101", port="19530")
 # 2. 定义集合的字段和模式
 fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=128)
 ]
 schema = CollectionSchema(fields, description="Simple demo collection")
 # 3. 创建集合
 collection_name = "demo_collection"
 if utility.has_collection(collection_name):
    utility.drop_collection(collection_name)  # 如果集合已存在，先删除
 collection = Collection(name=collection_name, schema=schema)
 # 4. 插入数据
 import random
 data = [
    [random.random() for _ in range(128)],  # 第一个向量
    [random.random() for _ in range(128)],  # 第二个向量
    [random.random() for _ in range(128)]   # 第三个向量
 ]
 entities = [data]  # 插入的数据
 collection.insert(entities)
 # 5. 创建索引
 index_params = {
    "index_type": "IVF_FLAT",  # 索引类型
    "metric_type": "L2",       # 距离度量方式
    "params": {"nlist": 128}   # 索引参数
 }
 collection.create_index("embedding", index_params)
 # 6. 加载集合到内存
 collection.load()
 # 7. 查询数据
 query_vector = [random.random() for _ in range(128)]  # 查询向量
 search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10}  # 查询参数
 }
 results = collection.search(
    data=[query_vector],  # 查询向量
    anns_field="embedding",  # 查询字段
    param=search_params,
    limit=2  # 返回最相似的 2 个结果
 )
 # 8. 输出查询结果
 for hits in results:
    for hit in hits:
        print(f"ID: {hit.id}, Distance: {hit.distance}")
 # 9. 关闭连接
 connections.disconnect("default")