You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
1.4 KiB

2 weeks ago
import asyncio
2 weeks ago
from Util.DocxUtil import get_docx_content_by_pandoc
2 weeks ago
from Util.LightRagUtil import configure_logging, initialize_rag
import os
2 weeks ago
# 化学
#WORKING_DIR = "./Topic/Chemistry"
#docx_file = 'static/Txt/化学方程式.docx'
# 苏轼
#WORKING_DIR = "./Topic/Chinese"
#docx_file = 'static/Txt/苏轼.docx'
# 数学
WORKING_DIR = "./Topic/Math"
docx_file = 'static/Txt/小学数学教学中的若干问题.docx'
2 weeks ago
async def main():
# 注释掉或删除以下清理代码
files_to_delete = [
"graph_chunk_entity_relation.graphml",
"kv_store_doc_status.json",
"kv_store_full_docs.json",
"kv_store_text_chunks.json",
"vdb_chunks.json",
"vdb_entities.json",
"vdb_relationships.json",
]
2 weeks ago
2 weeks ago
# 获取docx文件的内容
content = get_docx_content_by_pandoc(docx_file)
# 删除文件
2 weeks ago
for file in files_to_delete:
file_path = os.path.join(WORKING_DIR, file)
if os.path.exists(file_path):
os.remove(file_path)
print(f"Deleting old file:: {file_path}")
try:
rag = await initialize_rag(WORKING_DIR)
2 weeks ago
await rag.ainsert(content)
2 weeks ago
print("\nIndexing completed successfully!")
except Exception as e:
print(f"An error occurred: {e}")
finally:
await rag.finalize_storages()
if __name__ == "__main__":
configure_logging()
asyncio.run(main())