Files
YunNanProject/Rag/T1_Train.py
2025-09-12 21:57:36 +08:00

56 lines
1.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import logging
import os
from Util.LightRagUtil import initialize_rag
# 是不是清空重新生成
IS_CLEAR = False
# 更详细地控制日志输出
logger = logging.getLogger('lightrag')
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
async def main():
# 清空文件
if IS_CLEAR:
# 注释掉或删除以下清理代码
files_to_delete = [
"graph_chunk_entity_relation.graphml",
"kv_store_doc_status.json",
"kv_store_full_docs.json",
"kv_store_text_chunks.json",
"vdb_chunks.json",
"vdb_entities.json",
"vdb_relationships.json",
]
# 删除文件
for file in files_to_delete:
file_path = os.path.join(WORKING_DIR, file)
if os.path.exists(file_path):
os.remove(file_path)
logger.info(f"删除的文件:: {file_path}")
try:
# 注意默认设置使用NetworkX
rag = await initialize_rag(WORKING_DIR)
file_path='Txt/YunNan.txt'
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
await rag.ainsert(content, file_paths=[file_path])
logger.info(f"Inserted content from {file_path}")
except Exception as e:
logger.error(f"An error occurred: {e}")
finally:
await rag.finalize_storages()
# 组装文件路径
WORKING_DIR = "./KB"
asyncio.run(main())