main
HuangHai 4 weeks ago
parent 68c3ee4cb0
commit 0259996601

@ -36,9 +36,12 @@ for filename in os.listdir(txt_dir):
if filename.endswith('.txt'):
filepath = os.path.join(txt_dir, filename)
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read().strip()
# 只读取第一行作为向量计算
first_line = f.readline().strip()
# 读取全部内容用于后续查询
full_content = first_line + '\n' + f.read()
if not content:
if not first_line:
print(f"跳过空文件: {filename}")
continue
@ -46,15 +49,15 @@ for filename in os.listdir(txt_dir):
# 5. 获取当前时间和会话ID
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
tags = {"tags": ["MATH_DATA_1", "小学数学"]} # 直接使用Python字典Milvus会自动转换为JSON
tags = {"tags": ["MATH_DATA_1", "小学数学"], "full_content": full_content} # 添加完整内容
# 6. 将文本转换为嵌入向量
embedding = text_to_embedding(content)
# 6. 将第一行文本转换为嵌入向量
embedding = text_to_embedding(first_line)
# 7. 插入数据
entities = [
[tags], # tags
[content], # user_input
[first_line], # user_input
[timestamp], # timestamp
[embedding] # embedding
]

@ -59,7 +59,7 @@ results = collection_manager.search(
current_embedding,
search_params,
expr=expr, # 使用in操作符
limit=10
limit=5
)
end_time = time.time()

@ -99,7 +99,11 @@ async def generate_stream(client, milvus_pool, collection_manager, query):
logger.info(f"ID: {hit.id}")
logger.info(f"标签: {record['tags']}")
logger.info(f"用户问题: {record['user_input']}")
context = context + record['user_input']
# 获取完整内容
full_content = record['tags'].get('full_content', record['user_input'])
context = context + full_content
logger.info(f"时间: {record['timestamp']}")
logger.info(f"距离: {hit.distance}")
logger.info("-" * 40) # 分隔线

Loading…
Cancel
Save