|
|
import asyncio
|
|
|
import logging
|
|
|
import os
|
|
|
|
|
|
from Util.DocxUtil import get_docx_content_by_pandoc
|
|
|
from Util.LightRagUtil import initialize_rag
|
|
|
|
|
|
# 是不是清空重新生成
|
|
|
IS_CLEAR= True
|
|
|
|
|
|
# 更详细地控制日志输出
|
|
|
logger = logging.getLogger('lightrag')
|
|
|
logger.setLevel(logging.INFO)
|
|
|
handler = logging.StreamHandler()
|
|
|
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
|
logger.addHandler(handler)
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
# 清空文件
|
|
|
if IS_CLEAR:
|
|
|
# 注释掉或删除以下清理代码
|
|
|
files_to_delete = [
|
|
|
"graph_chunk_entity_relation.graphml",
|
|
|
"kv_store_doc_status.json",
|
|
|
"kv_store_full_docs.json",
|
|
|
"kv_store_text_chunks.json",
|
|
|
"vdb_chunks.json",
|
|
|
"vdb_entities.json",
|
|
|
"vdb_relationships.json",
|
|
|
]
|
|
|
|
|
|
# 删除文件
|
|
|
for file in files_to_delete:
|
|
|
file_path = os.path.join(WORKING_DIR, file)
|
|
|
if os.path.exists(file_path):
|
|
|
os.remove(file_path)
|
|
|
logger.info(f"删除的文件:: {file_path}")
|
|
|
|
|
|
try:
|
|
|
# 注意:默认设置使用NetworkX
|
|
|
rag = await initialize_rag(WORKING_DIR)
|
|
|
# 在docx_file 目录下遍历所有以KEMU开头的文件
|
|
|
for filename in os.listdir(docx_file):
|
|
|
if filename.startswith(KEMU):
|
|
|
file_path = os.path.join(docx_file, filename)
|
|
|
# 获取docx文件的内容
|
|
|
content = get_docx_content_by_pandoc(file_path)
|
|
|
await rag.ainsert(content, file_paths=[filename])
|
|
|
logger.info(f"Inserted content from {filename}")
|
|
|
except Exception as e:
|
|
|
logger.error(f"An error occurred: {e}")
|
|
|
finally:
|
|
|
await rag.finalize_storages()
|
|
|
|
|
|
#KEMUS=['JiHe','Math','SuShi','Chemistry','ShiJi','ChangChun']
|
|
|
KEMUS=['ShiJi']
|
|
|
|
|
|
for KEMU in KEMUS:
|
|
|
# 组装文件路径
|
|
|
WORKING_DIR = "./Topic/" + KEMU
|
|
|
docx_file = 'static/Txt/'
|
|
|
asyncio.run(main())
|
|
|
|
|
|
|
|
|
|
|
|
|