import asyncio import logging from Util.DocxUtil import get_docx_content_by_pandoc from Util.LightRagUtil import configure_logging, initialize_rag import os KEMU = 'ChuZhongShuXue' # JiHe,Math,SuShi,Chemistry,ShiJi,ChangChun # 组装文件路径 WORKING_DIR = "./Topic/" + KEMU docx_file = 'static/Txt/' # 是不是清空重新生成 IS_CLEAR= False # 更详细地控制日志输出 logger = logging.getLogger('lightrag') logger.setLevel(logging.INFO) handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) logger.addHandler(handler) async def main(): # 清空文件 if IS_CLEAR: # 注释掉或删除以下清理代码 files_to_delete = [ "graph_chunk_entity_relation.graphml", "kv_store_doc_status.json", "kv_store_full_docs.json", "kv_store_text_chunks.json", "vdb_chunks.json", "vdb_entities.json", "vdb_relationships.json", ] # 删除文件 for file in files_to_delete: file_path = os.path.join(WORKING_DIR, file) if os.path.exists(file_path): os.remove(file_path) logger.info(f"删除的文件:: {file_path}") try: # 注意:默认设置使用NetworkX rag = await initialize_rag(WORKING_DIR) # 在docx_file 目录下遍历所有以KEMU开头的文件 for filename in os.listdir(docx_file): if filename.startswith(KEMU): file_path = os.path.join(docx_file, filename) # 获取docx文件的内容 content = get_docx_content_by_pandoc(file_path) await rag.ainsert(content, file_paths=[filename]) logger.info(f"Inserted content from {filename}") except Exception as e: logger.error(f"An error occurred: {e}") finally: await rag.finalize_storages() if __name__ == "__main__": configure_logging() asyncio.run(main())