main
HuangHai 1 month ago
parent 956fd1b092
commit d75c683443

@ -27,13 +27,12 @@ if utility.has_collection(collection_name):
# 5. 定义集合的字段和模式 # 5. 定义集合的字段和模式
fields = [ fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), # 主键字段,自动生成 ID FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), # 主键字段,自动生成 ID
FieldSchema(name="person_id", dtype=DataType.VARCHAR, max_length=64), # 会话 ID FieldSchema(name="document_id", dtype=DataType.VARCHAR, max_length=64), # 文档 ID
FieldSchema(name="user_input", dtype=DataType.VARCHAR, max_length=65535), # 用户问题 FieldSchema(name="user_input", dtype=DataType.VARCHAR, max_length=65535), # 用户问题
FieldSchema(name="model_response", dtype=DataType.VARCHAR, max_length=65535), # 大模型反馈结果
FieldSchema(name="timestamp", dtype=DataType.VARCHAR, max_length=32), # 时间 FieldSchema(name="timestamp", dtype=DataType.VARCHAR, max_length=32), # 时间
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION) # 向量字段,维度为 200 FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION) # 向量字段,维度为 200
] ]
schema_description = "Chat records collection with person_id , user input, model response, and timestamp" schema_description = "Chat records collection with document_id , user_input, and timestamp"
# 6. 创建集合 # 6. 创建集合
print(f"正在创建集合 '{collection_name}'...") print(f"正在创建集合 '{collection_name}'...")

@ -46,16 +46,15 @@ for filename in os.listdir(txt_dir):
# 5. 获取当前时间和会话ID # 5. 获取当前时间和会话ID
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
person_id = "MATH_DATA_" + str(hash(filename)) document_id = "MATH_DATA_1" # 史校长的这本书定义为 MATH_DATA_1
# 6. 将文本转换为嵌入向量 # 6. 将文本转换为嵌入向量
embedding = text_to_embedding(content) embedding = text_to_embedding(content)
# 7. 插入数据 # 7. 插入数据
entities = [ entities = [
[person_id], # person_id [document_id], # document_id
[content], # user_input [content], # user_input
[""], # model_response (留空)
[timestamp], # timestamp [timestamp], # timestamp
[embedding] # embedding [embedding] # embedding
] ]

Binary file not shown.
Loading…
Cancel
Save