You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

230 lines
6.7 KiB

"""
Example of directly using modal processors
This example demonstrates how to use RAG-Anything's modal processors directly without going through MinerU.
"""
import asyncio
import argparse
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag import LightRAG
from raganything.modalprocessors import (
ImageModalProcessor,
TableModalProcessor,
EquationModalProcessor,
)
WORKING_DIR = "./rag_storage"
def get_llm_model_func(api_key: str, base_url: str = None):
return (
lambda prompt,
system_prompt=None,
history_messages=[],
**kwargs: openai_complete_if_cache(
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
)
)
def get_vision_model_func(api_key: str, base_url: str = None):
return (
lambda prompt,
system_prompt=None,
history_messages=[],
image_data=None,
**kwargs: openai_complete_if_cache(
"gpt-4o",
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt} if system_prompt else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
api_key=api_key,
base_url=base_url,
**kwargs,
)
if image_data
else openai_complete_if_cache(
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
)
)
async def process_image_example(lightrag: LightRAG, vision_model_func):
"""Example of processing an image"""
# Create image processor
image_processor = ImageModalProcessor(
lightrag=lightrag, modal_caption_func=vision_model_func
)
# Prepare image content
image_content = {
"img_path": "image.jpg",
"img_caption": ["Example image caption"],
"img_footnote": ["Example image footnote"],
}
# Process image
description, entity_info = await image_processor.process_multimodal_content(
modal_content=image_content,
content_type="image",
file_path="image_example.jpg",
entity_name="Example Image",
)
print("Image Processing Results:")
print(f"Description: {description}")
print(f"Entity Info: {entity_info}")
async def process_table_example(lightrag: LightRAG, llm_model_func):
"""Example of processing a table"""
# Create table processor
table_processor = TableModalProcessor(
lightrag=lightrag, modal_caption_func=llm_model_func
)
# Prepare table content
table_content = {
"table_body": """
| Name | Age | Occupation |
|------|-----|------------|
| John | 25 | Engineer |
| Mary | 30 | Designer |
""",
"table_caption": ["Employee Information Table"],
"table_footnote": ["Data updated as of 2024"],
}
# Process table
description, entity_info = await table_processor.process_multimodal_content(
modal_content=table_content,
content_type="table",
file_path="table_example.md",
entity_name="Employee Table",
)
print("\nTable Processing Results:")
print(f"Description: {description}")
print(f"Entity Info: {entity_info}")
async def process_equation_example(lightrag: LightRAG, llm_model_func):
"""Example of processing a mathematical equation"""
# Create equation processor
equation_processor = EquationModalProcessor(
lightrag=lightrag, modal_caption_func=llm_model_func
)
# Prepare equation content
equation_content = {"text": "E = mc^2", "text_format": "LaTeX"}
# Process equation
description, entity_info = await equation_processor.process_multimodal_content(
modal_content=equation_content,
content_type="equation",
file_path="equation_example.txt",
entity_name="Mass-Energy Equivalence",
)
print("\nEquation Processing Results:")
print(f"Description: {description}")
print(f"Entity Info: {entity_info}")
async def initialize_rag(api_key: str, base_url: str = None):
rag = LightRAG(
working_dir=WORKING_DIR,
embedding_func=EmbeddingFunc(
embedding_dim=3072,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="text-embedding-3-large",
api_key=api_key,
base_url=base_url,
),
),
llm_model_func=lambda prompt,
system_prompt=None,
history_messages=[],
**kwargs: openai_complete_if_cache(
"gpt-4o-mini",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
),
)
await rag.initialize_storages()
await initialize_pipeline_status()
return rag
def main():
"""Main function to run the example"""
parser = argparse.ArgumentParser(description="Modal Processors Example")
parser.add_argument("--api-key", required=True, help="OpenAI API key")
parser.add_argument("--base-url", help="Optional base URL for API")
parser.add_argument(
"--working-dir", "-w", default=WORKING_DIR, help="Working directory path"
)
args = parser.parse_args()
# Run examples
asyncio.run(main_async(args.api_key, args.base_url))
async def main_async(api_key: str, base_url: str = None):
# Initialize LightRAG
lightrag = await initialize_rag(api_key, base_url)
# Get model functions
llm_model_func = get_llm_model_func(api_key, base_url)
vision_model_func = get_vision_model_func(api_key, base_url)
# Run examples
await process_image_example(lightrag, vision_model_func)
await process_table_example(lightrag, llm_model_func)
await process_equation_example(lightrag, llm_model_func)
if __name__ == "__main__":
main()