import asyncio import sys import loguru from raganything import RAGAnything, RAGAnythingConfig from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc from Config.Config import * async def main(): # 在main函数开头添加日志配置 loguru.logger.remove() # 移除默认配置 loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台 # Create RAGAnything configuration config = RAGAnythingConfig( working_dir="./rag_storage", mineru_parse_method="auto", enable_image_processing=True, enable_table_processing=True, enable_equation_processing=True, ) # Define LLM model function def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): return openai_complete_if_cache( LLM_MODEL_NAME, prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=LLM_API_KEY, base_url=LLM_BASE_URL, **kwargs, ) # Define vision model function for image processing def vision_model_func( prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs ): if image_data: return openai_complete_if_cache( VISION_MODEL_NAME, # 这里设置为了硅基流动中免费视觉模型的模型名称 "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_data}" }, }, ], } if image_data else {"role": "user", "content": prompt}, ], api_key=VISION_API_KEY, base_url=VISION_BASE_URL, **kwargs, ) else: return llm_model_func(prompt, system_prompt, history_messages, **kwargs) # Define embedding function embedding_func = EmbeddingFunc( embedding_dim=1024, # 这里设置为了硅基流动中免费模型的嵌入维度 max_token_size=8192, func=lambda texts: openai_embed( texts, model=EMBED_MODEL_NAME, api_key=EMBED_API_KEY, base_url=EMBED_BASE_URL, ), ) # Initialize RAGAnything rag = RAGAnything( config=config, llm_model_func=llm_model_func, vision_model_func=vision_model_func, embedding_func=embedding_func, ) # file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf" file_path = "./Txt/驿来特平台安全.docx" await rag.process_document_complete( file_path=file_path, output_dir="./output", parse_method="auto" ) print("Processing complete.") if __name__ == "__main__": asyncio.run(main())