main
HuangHai 4 weeks ago
parent 956fd1b092
commit d75c683443

@ -27,13 +27,12 @@ if utility.has_collection(collection_name):
# 5. 定义集合的字段和模式
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True), # 主键字段,自动生成 ID
FieldSchema(name="person_id", dtype=DataType.VARCHAR, max_length=64), # 会话 ID
FieldSchema(name="document_id", dtype=DataType.VARCHAR, max_length=64), # 文档 ID
FieldSchema(name="user_input", dtype=DataType.VARCHAR, max_length=65535), # 用户问题
FieldSchema(name="model_response", dtype=DataType.VARCHAR, max_length=65535), # 大模型反馈结果
FieldSchema(name="timestamp", dtype=DataType.VARCHAR, max_length=32), # 时间
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=MS_DIMENSION) # 向量字段,维度为 200
]
schema_description = "Chat records collection with person_id , user input, model response, and timestamp"
schema_description = "Chat records collection with document_id , user_input, and timestamp"
# 6. 创建集合
print(f"正在创建集合 '{collection_name}'...")

@ -46,16 +46,15 @@ for filename in os.listdir(txt_dir):
# 5. 获取当前时间和会话ID
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
person_id = "MATH_DATA_" + str(hash(filename))
document_id = "MATH_DATA_1" # 史校长的这本书定义为 MATH_DATA_1
# 6. 将文本转换为嵌入向量
embedding = text_to_embedding(content)
# 7. 插入数据
entities = [
[person_id], # person_id
[document_id], # document_id
[content], # user_input
[""], # model_response (留空)
[timestamp], # timestamp
[embedding] # embedding
]

Binary file not shown.
Loading…
Cancel
Save