|
|
|
@ -36,9 +36,12 @@ for filename in os.listdir(txt_dir):
|
|
|
|
|
if filename.endswith('.txt'):
|
|
|
|
|
filepath = os.path.join(txt_dir, filename)
|
|
|
|
|
with open(filepath, 'r', encoding='utf-8') as f:
|
|
|
|
|
content = f.read().strip()
|
|
|
|
|
# 只读取第一行作为向量计算
|
|
|
|
|
first_line = f.readline().strip()
|
|
|
|
|
# 读取全部内容用于后续查询
|
|
|
|
|
full_content = first_line + '\n' + f.read()
|
|
|
|
|
|
|
|
|
|
if not content:
|
|
|
|
|
if not first_line:
|
|
|
|
|
print(f"跳过空文件: {filename}")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
@ -46,15 +49,15 @@ for filename in os.listdir(txt_dir):
|
|
|
|
|
|
|
|
|
|
# 5. 获取当前时间和会话ID
|
|
|
|
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
|
|
|
tags = {"tags": ["MATH_DATA_1", "小学数学"]} # 直接使用Python字典,Milvus会自动转换为JSON
|
|
|
|
|
tags = {"tags": ["MATH_DATA_1", "小学数学"], "full_content": full_content} # 添加完整内容
|
|
|
|
|
|
|
|
|
|
# 6. 将文本转换为嵌入向量
|
|
|
|
|
embedding = text_to_embedding(content)
|
|
|
|
|
# 6. 将第一行文本转换为嵌入向量
|
|
|
|
|
embedding = text_to_embedding(first_line)
|
|
|
|
|
|
|
|
|
|
# 7. 插入数据
|
|
|
|
|
entities = [
|
|
|
|
|
[tags], # tags
|
|
|
|
|
[content], # user_input
|
|
|
|
|
[first_line], # user_input
|
|
|
|
|
[timestamp], # timestamp
|
|
|
|
|
[embedding] # embedding
|
|
|
|
|
]
|
|
|
|
|