import asyncio import sys import loguru from raganything import RAGAnything, RAGAnythingConfig from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc async def main(): # 在main函数开头添加日志配置 loguru.logger.remove() # 移除默认配置 loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台 # Set up API configuration api_key = "sk-44ae895eeb614aa1a9c6460579e322f1" base_url = "https://api.deepseek.com" # Optional # Create RAGAnything configuration config = RAGAnythingConfig( working_dir="./rag_storage", mineru_parse_method="auto", enable_image_processing=True, enable_table_processing=True, enable_equation_processing=True, ) # Define LLM model function def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): return openai_complete_if_cache( "deepseek-chat", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=api_key, base_url=base_url, **kwargs, ) # Define vision model function for image processing def vision_model_func( prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs ): if image_data: return openai_complete_if_cache( "GLM-4.1V-9B-Thinking", # 这里设置为了硅基流动中免费视觉模型的模型名称 "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_data}" }, }, ], } if image_data else {"role": "user", "content": prompt}, ], api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl", base_url="https://api.siliconflow.cn/v1/chat/completions", **kwargs, ) else: return llm_model_func(prompt, system_prompt, history_messages, **kwargs) # Define embedding function embedding_func = EmbeddingFunc( embedding_dim=1024, # 这里设置为了硅基流动中免费模型的嵌入维度 max_token_size=8192, func=lambda texts: openai_embed( texts, model="BAAI/bge-m3", api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl", base_url="https://api.siliconflow.cn/v1", ), ) # Initialize RAGAnything rag = RAGAnything( config=config, llm_model_func=llm_model_func, vision_model_func=vision_model_func, embedding_func=embedding_func, ) #file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf" file_path = "./Txt/驿来特平台安全.docx" # Process a document await rag.process_document_complete( file_path=file_path, output_dir="./output", parse_method="auto" ) print("Processing complete.") # 调用LightRAG的代码,包括:1、创建索引,2、查询知识库,不应该放在这个模块中完成,这里只需要完成到文本解析完成即可。 # Query the processed content # Pure text query - for basic knowledge base search # text_result = await rag.aquery( # "这个文档是做什么用的?", # mode="hybrid" # ) # print("Text query result:", text_result) # # Multimodal query with specific multimodal content # multimodal_result = await rag.aquery_with_multimodal( # "Explain this formula and its relevance to the document content", # multimodal_content=[{ # "type": "equation", # "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}", # "equation_caption": "Document relevance probability" # }], # mode="hybrid" # ) # print("Multimodal query result:", multimodal_result) if __name__ == "__main__": asyncio.run(main())