You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
1.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import asyncio
from Util.DocxUtil import get_docx_content_by_pandoc
from Util.LightRagUtil import configure_logging, initialize_rag
import os
KEMU = 'ShiJi' # JiHe,Math,SuShi,Chemistry,ShiJi,ChangChun
# 组装文件路径
WORKING_DIR = "./Topic/" + KEMU
docx_file = 'static/Txt/'
async def main():
# 注释掉或删除以下清理代码
files_to_delete = [
"graph_chunk_entity_relation.graphml",
"kv_store_doc_status.json",
"kv_store_full_docs.json",
"kv_store_text_chunks.json",
"vdb_chunks.json",
"vdb_entities.json",
"vdb_relationships.json",
]
# 删除文件
for file in files_to_delete:
file_path = os.path.join(WORKING_DIR, file)
if os.path.exists(file_path):
os.remove(file_path)
print(f"Deleting old file:: {file_path}")
try:
# 注意默认设置使用NetworkX
rag = await initialize_rag(WORKING_DIR)
# 在docx_file 目录下遍历所有以KEMU开头的文件
for filename in os.listdir(docx_file):
if filename.startswith(KEMU):
file_path = os.path.join(docx_file, filename)
# 获取docx文件的内容
content = get_docx_content_by_pandoc(file_path)
await rag.ainsert(content, file_paths=[filename])
print(f"Inserted content from {filename}")
except Exception as e:
print(f"An error occurred: {e}")
finally:
await rag.finalize_storages()
if __name__ == "__main__":
configure_logging()
asyncio.run(main())