import asyncio from raganything import RAGAnything from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc async def main(): # Initialize RAGAnything rag = RAGAnything( working_dir="./rag_storage", llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="your-api-key", **kwargs, ), vision_model_func=lambda prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs: openai_complete_if_cache( "gpt-4o", "", system_prompt=None, history_messages=[], messages=[ {"role": "system", "content": system_prompt} if system_prompt else None, {"role": "user", "content": [ {"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}} ]} if image_data else {"role": "user", "content": prompt} ], api_key="your-api-key", **kwargs, ) if image_data else openai_complete_if_cache( "gpt-4o-mini", prompt, system_prompt=system_prompt, history_messages=history_messages, api_key="your-api-key", **kwargs, ), embedding_func=EmbeddingFunc( embedding_dim=3072, max_token_size=8192, func=lambda texts: openai_embed( texts, model="text-embedding-3-large", api_key=api_key, base_url=base_url, ), ), ) # Process a document await rag.process_document_complete( file_path="path/to/your/document.pdf", output_dir="./output", parse_method="auto" ) # Query the processed content # Pure text query - for basic knowledge base search text_result = await rag.aquery( "What are the main findings shown in the figures and tables?", mode="hybrid" ) print("Text query result:", text_result) # Multimodal query with specific multimodal content multimodal_result = await rag.aquery_with_multimodal( "Explain this formula and its relevance to the document content", multimodal_content=[{ "type": "equation", "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}", "equation_caption": "Document relevance probability" }], mode="hybrid" ) print("Multimodal query result:", multimodal_result) if __name__ == "__main__": asyncio.run(main())