You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

51 lines
1.7 KiB

3 weeks ago
import asyncio
from raganything import RAGAnything, RAGAnythingConfig
3 weeks ago
from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
3 weeks ago
async def main():
3 weeks ago
# 要处理的文件路径
3 weeks ago
#file_path = "./Txt/氢气与氧气反应化学方程式.docx"
3 weeks ago
#file_path = "static/Txt/苏轼.docx"
file_path = "static/Txt/化学方程式_CHEMISTRY_1.docx"
3 weeks ago
# 索引生成目录
3 weeks ago
WORKING_DIR = "./Topic/Chemistry"
3 weeks ago
3 weeks ago
# 指定最终的索引生成目录,启动索引生成
3 weeks ago
config = RAGAnythingConfig(
3 weeks ago
working_dir=WORKING_DIR,
3 weeks ago
mineru_parse_method="auto",
enable_image_processing=True,
enable_table_processing=True,
enable_equation_processing=True,
)
3 weeks ago
# 自定义的大模型函数
3 weeks ago
llm_model_func = create_llm_model_func()
3 weeks ago
# 自定义的可视模型函数
3 weeks ago
vision_model_func = create_vision_model_func(llm_model_func)
3 weeks ago
# 自定义的嵌入函数
3 weeks ago
embedding_func = create_embedding_func()
3 weeks ago
rag = RAGAnything(
config=config,
llm_model_func=llm_model_func,
vision_model_func=vision_model_func,
embedding_func=embedding_func,
)
3 weeks ago
<<<<<<< HEAD
3 weeks ago
3 weeks ago
# file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
3 weeks ago
#file_path = "./Txt/驿来特平台安全.docx"
file_path = "./Txt/苏轼.docx"
3 weeks ago
=======
>>>>>>> 9a3b002e0c73115f6c6a7a67ddb43e270de17d62
3 weeks ago
await rag.process_document_complete(
3 weeks ago
file_path=file_path,
3 weeks ago
output_dir="./output",
parse_method="auto"
)
3 weeks ago
print("文档解析索引完成!")
3 weeks ago
3 weeks ago
if __name__ == "__main__":
asyncio.run(main())