You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

68 lines
2.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import asyncio
import logging
import os
from Util.DocxUtil import get_docx_content_by_pandoc
from Util.LightRagUtil import initialize_rag
# 是不是清空重新生成
IS_CLEAR= True
# 更详细地控制日志输出
logger = logging.getLogger('lightrag')
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
async def main():
# 清空文件
if IS_CLEAR:
# 注释掉或删除以下清理代码
files_to_delete = [
"graph_chunk_entity_relation.graphml",
"kv_store_doc_status.json",
"kv_store_full_docs.json",
"kv_store_text_chunks.json",
"vdb_chunks.json",
"vdb_entities.json",
"vdb_relationships.json",
]
# 删除文件
for file in files_to_delete:
file_path = os.path.join(WORKING_DIR, file)
if os.path.exists(file_path):
os.remove(file_path)
logger.info(f"删除的文件:: {file_path}")
try:
# 注意默认设置使用NetworkX
rag = await initialize_rag(WORKING_DIR)
# 在docx_file 目录下遍历所有以KEMU开头的文件
for filename in os.listdir(docx_file):
if filename.startswith(KEMU):
file_path = os.path.join(docx_file, filename)
# 获取docx文件的内容
content = get_docx_content_by_pandoc(file_path)
await rag.ainsert(content, file_paths=[filename])
logger.info(f"Inserted content from {filename}")
except Exception as e:
logger.error(f"An error occurred: {e}")
finally:
await rag.finalize_storages()
#KEMUS=['JiHe','Math','SuShi','Chemistry','ShiJi','ChangChun']
KEMUS=['ShiJi']
for KEMU in KEMUS:
# 组装文件路径
WORKING_DIR = "./Topic/" + KEMU
docx_file = 'static/Txt/'
asyncio.run(main())