'commit'

4 months ago · d1adc7dff1
parent 1c7a99ab59
commit d1adc7dff1
2 changed files with 35 additions and 20 deletions
--- a/AI/WxMini/Milvus/T3_insert_data.py
+++ b/AI/WxMini/Milvus/T3_insert_data.py
@ -2,19 +2,26 @@ from WxMini.Milvus.Config.MulvusConfig import *
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
 from WxMini.Milvus.Utils.MilvusConnectionPool import *
 from gensim.models import KeyedVectors
+import jieba  # 导入 jieba 分词库

 # 加载预训练的 Word2Vec 模型
 model_path = "D:/Tencent_AILab_ChineseEmbedding/Tencent_AILab_ChineseEmbedding.txt"  # 替换为你的 Word2Vec 模型路径
 model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=10000)
+print(f"模型加载成功，词向量维度: {model.vector_size}")

 # 将文本转换为嵌入向量
 def text_to_embedding(text):
-    words = text.split()
+    words = jieba.lcut(text)  # 使用 jieba 分词
+    print(f"文本: {text}, 分词结果: {words}")
    embeddings = [model[word] for word in words if word in model]
+    print(f"有效词向量数量: {len(embeddings)}")
    if embeddings:
-        return sum(embeddings) / len(embeddings)  # 取词向量的平均值
+        avg_embedding = sum(embeddings) / len(embeddings)
+        print(f"生成的平均向量: {avg_embedding[:5]}...")  # 打印前 5 维
+        return avg_embedding
    else:
-        return [0.0] * model.vector_size  # 如果文本中没有有效词，返回零向量
+        print("未找到有效词，返回零向量")
+        return [0.0] * model.vector_size

 # 1. 使用连接池管理 Milvus 连接
 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS)
@ -35,6 +42,10 @@ texts = [
 ]
 embeddings = [text_to_embedding(text) for text in texts]  # 使用文本模型生成向量

+# 打印生成的向量值
+for text, embedding in zip(texts, embeddings):
+    print(f"文本: {text}, 向量: {embedding[:5]}...")  # 打印前 5 维
+
 # 5. 插入数据，确保字段顺序与集合定义一致
 entities = [texts, embeddings]  # 第一个列表是 text 字段，第二个列表是 embedding 字段
 collection_manager.insert_data(entities)
--- a/AI/WxMini/Milvus/T4_select_all_data.py
+++ b/AI/WxMini/Milvus/T4_select_all_data.py
@ -1,5 +1,3 @@
-import random
-
 from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
 from WxMini.Milvus.Utils.MilvusConnectionPool import *
 from WxMini.Milvus.Config.MulvusConfig import *
@ -18,21 +16,27 @@ collection_manager = MilvusCollectionManager(collection_name)
 collection_manager.load_collection()
 print(f"集合 '{collection_name}' 已加载到内存。")

-# 5. 查询数据
-query_vector = [random.random() for _ in range(128)]  # 随机生成一个查询向量
-search_params = {
-    "metric_type": "L2",       # 使用 L2 距离度量方式
-    "params": {"nprobe": 10}   # 设置 IVF_FLAT 的 nprobe 参数
-}
-results = collection_manager.search(query_vector, search_params, limit=200)
-print("查询结果：")
-if results:
-    for hits in results:
-        for hit in hits:
-            text = collection_manager.query_text_by_id(hit.id)  # 获取 text 字段
-            print(f"ID: {hit.id}, Text: {text}, Distance: {hit.distance}")
-else:
-    print("未找到相关数据，请检查查询参数或数据。")
+# 5. 查询所有数据
+try:
+    # 使用 Milvus 的 query 方法查询所有数据
+    results = collection_manager.collection.query(
+        expr="",  # 空表达式表示查询所有数据
+        output_fields=["id", "text", "embedding"],  # 指定返回的字段
+        limit=1000  # 设置最大返回记录数
+    )
+    print("查询结果：")
+    if results:
+        for result in results:
+            try:
+                text = result["text"]  # 获取 text 字段
+                embedding = result["embedding"]  # 获取 embedding 字段
+                print(f"ID: {result['id']}, Text: {text}, Embedding: {embedding[:5]}...")  # 只打印前 5 维向量
+            except Exception as e:
+                print(f"查询失败: {e}")
+    else:
+        print("未找到相关数据，请检查查询参数或数据。")
+except Exception as e:
+    print(f"查询失败: {e}")

 # 6. 释放连接
 milvus_pool.release_connection(connection)