import asyncio from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD from Util.DocxUtil import get_docx_content_by_pandoc from Util.LightRagUtil import configure_logging, initialize_rag import os from Util.Neo4jExecutor import Neo4jExecutor # 数学 KEMU = 'Chemistry' # Chinese,Math,Chemistry # 组装文件路径 WORKING_DIR = "./Topic/" + KEMU docx_file = 'static/Txt/' + KEMU + '.docx' async def main(): # 设置Neo4j连接参数 os.environ["NEO4J_URI"] = NEO4J_URI os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD # 清库 executor = Neo4jExecutor.create_default() executor.graph.run("MATCH (n) DETACH DELETE n") print("清库成功") # 注释掉或删除以下清理代码 files_to_delete = [ "graph_chunk_entity_relation.graphml", "kv_store_doc_status.json", "kv_store_full_docs.json", "kv_store_text_chunks.json", "vdb_chunks.json", "vdb_entities.json", "vdb_relationships.json", ] # 获取docx文件的内容 content = get_docx_content_by_pandoc(docx_file) # 删除文件 for file in files_to_delete: file_path = os.path.join(WORKING_DIR, file) if os.path.exists(file_path): os.remove(file_path) print(f"Deleting old file:: {file_path}") try: # 注意:默认设置使用NetworkX,使用Neo4J实现初始化LightRAG rag = await initialize_rag(WORKING_DIR, graph_storage="Neo4JStorage") await rag.ainsert(content) print("\nIndexing completed successfully!") except Exception as e: print(f"An error occurred: {e}") finally: await rag.finalize_storages() if __name__ == "__main__": configure_logging() asyncio.run(main())