main
HuangHai 4 weeks ago
parent 15b6c65325
commit 39e1f9d4f7

@ -26,11 +26,11 @@ if utility.has_collection(collection_name):
# 5. 定义集合的字段和模式
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), # 主键字段,自动生成 ID
FieldSchema(name="document_id", dtype=DataType.VARCHAR, max_length=64), # 文档 ID
FieldSchema(name="user_input", dtype=DataType.VARCHAR, max_length=65535), # 用户问题
FieldSchema(name="timestamp", dtype=DataType.VARCHAR, max_length=32), # 时间
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION) # 向量字段,维度为 200
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="tags", dtype=DataType.JSON), # 改为JSON类型存储多个标签
FieldSchema(name="user_input", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="timestamp", dtype=DataType.VARCHAR, max_length=32),
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION)
]
schema_description = "Chat records collection with document_id , user_input, and timestamp"

@ -46,14 +46,14 @@ for filename in os.listdir(txt_dir):
# 5. 获取当前时间和会话ID
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
document_id = "MATH_DATA_1" # 史校长的这本书定义为 MATH_DATA_1
tags = {"tags": ["MATH_DATA_1", "小学数学"]} # 直接使用Python字典Milvus会自动转换为JSON
# 6. 将文本转换为嵌入向量
embedding = text_to_embedding(content)
# 7. 插入数据
entities = [
[document_id], # document_id
[tags], # tags
[content], # user_input
[timestamp], # timestamp
[embedding] # embedding

@ -21,7 +21,7 @@ try:
# 使用 Milvus 的 query 方法查询所有数据
results = collection_manager.collection.query(
expr="", # 空表达式表示查询所有数据
output_fields=["id", "document_id", "user_input", "timestamp", "embedding"], # 指定返回的字段
output_fields=["id", "tags", "user_input", "timestamp", "embedding"], # 指定返回的字段
limit=1000 # 设置最大返回记录数
)
print("查询结果:")
@ -29,13 +29,13 @@ try:
for result in results:
try:
# 获取字段值
document_id = result["document_id"]
tags = result["tags"]
user_input = result["user_input"]
timestamp = result["timestamp"]
embedding = result["embedding"]
# 打印结果
print(f"ID: {result['id']}")
print(f"文档 ID: {document_id}")
print(f"标签: {tags}")
print(f"用户问题: {user_input}")
print(f"时间: {timestamp}")
print(f"向量: {embedding[:5]}...") # 只打印前 5 维向量

@ -55,7 +55,7 @@ search_params = {
# 哪些文档查询,哪些不查询,我说了算!
# 这样的话,我就可以打多个标签了!
# expr = "document_id in ['MATH_DATA_1', 'MATH_DATA_2']"
expr = "document_id in ['MATH_DATA_1']"
expr = "tags['tags'] == 'MATH_DATA_1'"
results = collection_manager.search(
current_embedding,
search_params,
@ -74,7 +74,7 @@ if results:
# 查询非向量字段
record = collection_manager.query_by_id(hit.id)
print(f"ID: {hit.id}")
print(f"文档 ID: {record['document_id']}")
print(f"标签: {record['tags']}")
print(f"用户问题: {record['user_input']}")
print(f"时间: {record['timestamp']}")
print(f"距离: {hit.distance}")

Loading…
Cancel
Save