|
|
import asyncio
|
|
|
|
|
|
from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
|
|
|
from Util.DocxUtil import get_docx_content_by_pandoc
|
|
|
from Util.LightRagUtil import configure_logging, initialize_rag
|
|
|
import os
|
|
|
|
|
|
from Util.Neo4jExecutor import Neo4jExecutor
|
|
|
|
|
|
# 数学
|
|
|
KEMU = 'Chemistry' # Chinese,Math,Chemistry
|
|
|
|
|
|
# 组装文件路径
|
|
|
WORKING_DIR = "./Topic/" + KEMU
|
|
|
docx_file = 'static/Txt/' + KEMU + '.docx'
|
|
|
|
|
|
async def main():
|
|
|
# 设置Neo4j连接参数
|
|
|
os.environ["NEO4J_URI"] = NEO4J_URI
|
|
|
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
|
|
|
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
|
|
|
|
|
|
# 清库
|
|
|
executor = Neo4jExecutor.create_default()
|
|
|
executor.graph.run("MATCH (n) DETACH DELETE n")
|
|
|
print("清库成功")
|
|
|
|
|
|
# 注释掉或删除以下清理代码
|
|
|
files_to_delete = [
|
|
|
"graph_chunk_entity_relation.graphml",
|
|
|
"kv_store_doc_status.json",
|
|
|
"kv_store_full_docs.json",
|
|
|
"kv_store_text_chunks.json",
|
|
|
"vdb_chunks.json",
|
|
|
"vdb_entities.json",
|
|
|
"vdb_relationships.json",
|
|
|
]
|
|
|
|
|
|
# 获取docx文件的内容
|
|
|
content = get_docx_content_by_pandoc(docx_file)
|
|
|
|
|
|
# 删除文件
|
|
|
for file in files_to_delete:
|
|
|
file_path = os.path.join(WORKING_DIR, file)
|
|
|
if os.path.exists(file_path):
|
|
|
os.remove(file_path)
|
|
|
print(f"Deleting old file:: {file_path}")
|
|
|
|
|
|
try:
|
|
|
# 注意:默认设置使用NetworkX,使用Neo4J实现初始化LightRAG
|
|
|
rag = await initialize_rag(WORKING_DIR, graph_storage="Neo4JStorage")
|
|
|
await rag.ainsert(content)
|
|
|
print("\nIndexing completed successfully!")
|
|
|
except Exception as e:
|
|
|
print(f"An error occurred: {e}")
|
|
|
finally:
|
|
|
await rag.finalize_storages()
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
configure_logging()
|
|
|
asyncio.run(main())
|