You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
70 lines
2.2 KiB
70 lines
2.2 KiB
import asyncio
|
|
import os
|
|
import shutil
|
|
|
|
from lightrag import LightRAG
|
|
from lightrag.kg.shared_storage import initialize_pipeline_status
|
|
from lightrag.utils import setup_logger
|
|
from raganything import RAGAnything, RAGAnythingConfig
|
|
from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
|
|
|
|
async def main():
|
|
# 要处理的文件路径
|
|
file_path = "static/Txt/吉林动画学院一览表.pdf"
|
|
|
|
# 索引生成目录
|
|
WORKING_DIR = "./Topic/DongHua"
|
|
|
|
|
|
# 删除output目录下的所有文件
|
|
output_dir="./output"
|
|
shutil.rmtree(output_dir, ignore_errors=True)
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# 删除WORKING_DIR下的所有文件
|
|
shutil.rmtree(WORKING_DIR, ignore_errors=True)
|
|
os.makedirs(WORKING_DIR, exist_ok=True)
|
|
|
|
# 指定最终的索引生成目录,启动索引生成
|
|
config = RAGAnythingConfig(
|
|
working_dir=WORKING_DIR,
|
|
mineru_parse_method="auto",
|
|
enable_image_processing=True,
|
|
enable_table_processing=True,
|
|
enable_equation_processing=True,
|
|
)
|
|
# 自定义的大模型函数
|
|
llm_model_func = create_llm_model_func()
|
|
# 自定义的可视模型函数
|
|
vision_model_func = create_vision_model_func(llm_model_func)
|
|
# 自定义的嵌入函数
|
|
embedding_func = create_embedding_func()
|
|
|
|
# 为LightRAG设置日志记录器
|
|
setup_logger("lightrag", level="INFO")
|
|
lightrag_instance = LightRAG(
|
|
working_dir=WORKING_DIR,
|
|
llm_model_func=llm_model_func,
|
|
embedding_func=embedding_func
|
|
)
|
|
# 初始化数据库连接
|
|
await lightrag_instance.initialize_storages()
|
|
# 初始化文档处理的管道状态
|
|
await initialize_pipeline_status()
|
|
|
|
rag = RAGAnything(
|
|
config=config,
|
|
lightrag=lightrag_instance,
|
|
vision_model_func=vision_model_func,
|
|
)
|
|
|
|
await rag.process_document_complete(
|
|
file_path=file_path,
|
|
output_dir=output_dir,
|
|
parse_method="auto"
|
|
)
|
|
print("文档解析索引完成!")
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|