You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

70 lines
2.2 KiB

3 weeks ago
import asyncio
2 weeks ago
import os
import shutil
3 weeks ago
from raganything import RAGAnything, RAGAnythingConfig
2 weeks ago
from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
3 weeks ago
2 weeks ago
import logging
# 在程序开始时添加以下配置
logging.basicConfig(
level=logging.INFO, # 设置日志级别为INFO
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
# 或者如果你想更详细地控制日志输出
logger = logging.getLogger('lightrag')
2 weeks ago
logger.setLevel(logging.INFO)
2 weeks ago
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(handler)
3 weeks ago
async def main():
2 weeks ago
# 要处理的文件路径
2 weeks ago
file_path = "static/Txt/吉林动画学院一览表.pdf"
2 weeks ago
# 索引生成目录
2 weeks ago
WORKING_DIR = "./Topic/DongHua"
2 weeks ago
# 删除output目录下的所有文件
2 weeks ago
output_dir = "./output"
2 weeks ago
shutil.rmtree(output_dir, ignore_errors=True)
os.makedirs(output_dir, exist_ok=True)
# 删除WORKING_DIR下的所有文件
shutil.rmtree(WORKING_DIR, ignore_errors=True)
os.makedirs(WORKING_DIR, exist_ok=True)
3 weeks ago
2 weeks ago
# 指定最终的索引生成目录,启动索引生成
3 weeks ago
config = RAGAnythingConfig(
2 weeks ago
working_dir=WORKING_DIR,
3 weeks ago
mineru_parse_method="auto",
2 weeks ago
enable_image_processing=True, # 处理图片
2 weeks ago
enable_table_processing=True, # 处理表格
enable_equation_processing=True, # 处理公式
3 weeks ago
)
2 weeks ago
# 自定义的大模型函数
2 weeks ago
llm_model_func = create_llm_model_func()
2 weeks ago
# 自定义的可视模型函数
2 weeks ago
vision_model_func = create_vision_model_func(llm_model_func)
2 weeks ago
# 自定义的嵌入函数
2 weeks ago
embedding_func = create_embedding_func()
3 weeks ago
rag = RAGAnything(
config=config,
llm_model_func=llm_model_func,
vision_model_func=vision_model_func,
embedding_func=embedding_func,
)
await rag.process_document_complete(
3 weeks ago
file_path=file_path,
2 weeks ago
output_dir=output_dir,
3 weeks ago
parse_method="auto"
)
2 weeks ago
print("文档解析索引完成!")
3 weeks ago
2 weeks ago
3 weeks ago
if __name__ == "__main__":
asyncio.run(main())