From 0259996601163f1d36a4dfaa4b34e679ac0b7ac5 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Thu, 26 Jun 2025 16:48:09 +0800 Subject: [PATCH] 'commit' --- dsRag/Milvus/X4_InsertData.py | 15 +++++++++------ dsRag/Milvus/X6_search_near_data.py | 2 +- dsRag/Start.py | 6 +++++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/dsRag/Milvus/X4_InsertData.py b/dsRag/Milvus/X4_InsertData.py index 230527c5..ba0e832f 100644 --- a/dsRag/Milvus/X4_InsertData.py +++ b/dsRag/Milvus/X4_InsertData.py @@ -36,9 +36,12 @@ for filename in os.listdir(txt_dir): if filename.endswith('.txt'): filepath = os.path.join(txt_dir, filename) with open(filepath, 'r', encoding='utf-8') as f: - content = f.read().strip() + # 只读取第一行作为向量计算 + first_line = f.readline().strip() + # 读取全部内容用于后续查询 + full_content = first_line + '\n' + f.read() - if not content: + if not first_line: print(f"跳过空文件: {filename}") continue @@ -46,15 +49,15 @@ for filename in os.listdir(txt_dir): # 5. 获取当前时间和会话ID timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - tags = {"tags": ["MATH_DATA_1", "小学数学"]} # 直接使用Python字典,Milvus会自动转换为JSON + tags = {"tags": ["MATH_DATA_1", "小学数学"], "full_content": full_content} # 添加完整内容 - # 6. 将文本转换为嵌入向量 - embedding = text_to_embedding(content) + # 6. 将第一行文本转换为嵌入向量 + embedding = text_to_embedding(first_line) # 7. 插入数据 entities = [ [tags], # tags - [content], # user_input + [first_line], # user_input [timestamp], # timestamp [embedding] # embedding ] diff --git a/dsRag/Milvus/X6_search_near_data.py b/dsRag/Milvus/X6_search_near_data.py index 0f10f2bb..fae5615c 100644 --- a/dsRag/Milvus/X6_search_near_data.py +++ b/dsRag/Milvus/X6_search_near_data.py @@ -59,7 +59,7 @@ results = collection_manager.search( current_embedding, search_params, expr=expr, # 使用in操作符 - limit=10 + limit=5 ) end_time = time.time() diff --git a/dsRag/Start.py b/dsRag/Start.py index c0943113..7cc563e1 100644 --- a/dsRag/Start.py +++ b/dsRag/Start.py @@ -99,7 +99,11 @@ async def generate_stream(client, milvus_pool, collection_manager, query): logger.info(f"ID: {hit.id}") logger.info(f"标签: {record['tags']}") logger.info(f"用户问题: {record['user_input']}") - context = context + record['user_input'] + + # 获取完整内容 + full_content = record['tags'].get('full_content', record['user_input']) + context = context + full_content + logger.info(f"时间: {record['timestamp']}") logger.info(f"距离: {hit.distance}") logger.info("-" * 40) # 分隔线