You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

65 lines
2.1 KiB

2 weeks ago
import asyncio
6 days ago
import logging
2 weeks ago
from Util.DocxUtil import get_docx_content_by_pandoc
7 days ago
from Util.LightRagUtil import configure_logging, initialize_rag
import os
2 weeks ago
5 days ago
KEMU = 'ChuZhongShuXue' # JiHe,Math,SuShi,Chemistry,ShiJi,ChangChun
2 weeks ago
7 days ago
# 组装文件路径
WORKING_DIR = "./Topic/" + KEMU
5 days ago
docx_file = 'static/Txt/'
2 weeks ago
6 days ago
# 是不是清空重新生成
IS_CLEAR= False
# 更详细地控制日志输出
logger = logging.getLogger('lightrag')
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
6 days ago
2 weeks ago
async def main():
6 days ago
# 清空文件
if IS_CLEAR:
# 注释掉或删除以下清理代码
files_to_delete = [
"graph_chunk_entity_relation.graphml",
"kv_store_doc_status.json",
"kv_store_full_docs.json",
"kv_store_text_chunks.json",
"vdb_chunks.json",
"vdb_entities.json",
"vdb_relationships.json",
]
# 删除文件
for file in files_to_delete:
file_path = os.path.join(WORKING_DIR, file)
if os.path.exists(file_path):
os.remove(file_path)
logger.info(f"删除的文件:: {file_path}")
7 days ago
try:
# 注意默认设置使用NetworkX
rag = await initialize_rag(WORKING_DIR)
6 days ago
# 在docx_file 目录下遍历所有以KEMU开头的文件
for filename in os.listdir(docx_file):
if filename.startswith(KEMU):
file_path = os.path.join(docx_file, filename)
# 获取docx文件的内容
content = get_docx_content_by_pandoc(file_path)
await rag.ainsert(content, file_paths=[filename])
6 days ago
logger.info(f"Inserted content from {filename}")
7 days ago
except Exception as e:
6 days ago
logger.error(f"An error occurred: {e}")
7 days ago
finally:
await rag.finalize_storages()
2 weeks ago
if __name__ == "__main__":
7 days ago
configure_logging()
2 weeks ago
asyncio.run(main())