You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
327 lines
11 KiB
327 lines
11 KiB
"""
|
|
Query functionality for RAGAnything
|
|
|
|
Contains all query-related methods for both text and multimodal queries
|
|
"""
|
|
|
|
from typing import Dict, List, Any
|
|
from pathlib import Path
|
|
from lightrag import QueryParam
|
|
from lightrag.utils import always_get_an_event_loop
|
|
from raganything.prompt import PROMPTS
|
|
from raganything.utils import get_processor_for_type
|
|
|
|
|
|
class QueryMixin:
|
|
"""QueryMixin class containing query functionality for RAGAnything"""
|
|
|
|
async def aquery(self, query: str, mode: str = "hybrid", **kwargs) -> str:
|
|
"""
|
|
Pure text query - directly calls LightRAG's query functionality
|
|
|
|
Args:
|
|
query: Query text
|
|
mode: Query mode ("local", "global", "hybrid", "naive", "mix", "bypass")
|
|
**kwargs: Other query parameters, will be passed to QueryParam
|
|
|
|
Returns:
|
|
str: Query result
|
|
"""
|
|
if self.lightrag is None:
|
|
raise ValueError(
|
|
"No LightRAG instance available. Please process documents first or provide a pre-initialized LightRAG instance."
|
|
)
|
|
|
|
# Create query parameters
|
|
query_param = QueryParam(mode=mode, **kwargs)
|
|
|
|
self.logger.info(f"Executing text query: {query[:100]}...")
|
|
self.logger.info(f"Query mode: {mode}")
|
|
|
|
# Call LightRAG's query method
|
|
result = await self.lightrag.aquery(query, param=query_param)
|
|
|
|
self.logger.info("Text query completed")
|
|
return result
|
|
|
|
async def aquery_with_multimodal(
|
|
self,
|
|
query: str,
|
|
multimodal_content: List[Dict[str, Any]] = None,
|
|
mode: str = "hybrid",
|
|
**kwargs,
|
|
) -> str:
|
|
"""
|
|
Multimodal query - combines text and multimodal content for querying
|
|
|
|
Args:
|
|
query: Base query text
|
|
multimodal_content: List of multimodal content, each element contains:
|
|
- type: Content type ("image", "table", "equation", etc.)
|
|
- Other fields depend on type (e.g., img_path, table_data, latex, etc.)
|
|
mode: Query mode ("local", "global", "hybrid", "naive", "mix", "bypass")
|
|
**kwargs: Other query parameters, will be passed to QueryParam
|
|
|
|
Returns:
|
|
str: Query result
|
|
|
|
Examples:
|
|
# Pure text query
|
|
result = await rag.query_with_multimodal("What is machine learning?")
|
|
|
|
# Image query
|
|
result = await rag.query_with_multimodal(
|
|
"Analyze the content in this image",
|
|
multimodal_content=[{
|
|
"type": "image",
|
|
"img_path": "./image.jpg"
|
|
}]
|
|
)
|
|
|
|
# Table query
|
|
result = await rag.query_with_multimodal(
|
|
"Analyze the data trends in this table",
|
|
multimodal_content=[{
|
|
"type": "table",
|
|
"table_data": "Name,Age\nAlice,25\nBob,30"
|
|
}]
|
|
)
|
|
"""
|
|
# Ensure LightRAG is initialized
|
|
await self._ensure_lightrag_initialized()
|
|
|
|
self.logger.info(f"Executing multimodal query: {query[:100]}...")
|
|
self.logger.info(f"Query mode: {mode}")
|
|
|
|
# If no multimodal content, fallback to pure text query
|
|
if not multimodal_content:
|
|
self.logger.info("No multimodal content provided, executing text query")
|
|
return await self.aquery(query, mode=mode, **kwargs)
|
|
|
|
# Process multimodal content to generate enhanced query text
|
|
enhanced_query = await self._process_multimodal_query_content(
|
|
query, multimodal_content
|
|
)
|
|
|
|
# Create query parameters
|
|
query_param = QueryParam(mode=mode, **kwargs)
|
|
|
|
self.logger.info(
|
|
f"Generated enhanced query length: {len(enhanced_query)} characters"
|
|
)
|
|
|
|
# Execute enhanced query
|
|
result = await self.lightrag.aquery(enhanced_query, param=query_param)
|
|
|
|
self.logger.info("Multimodal query completed")
|
|
return result
|
|
|
|
async def _process_multimodal_query_content(
|
|
self, base_query: str, multimodal_content: List[Dict[str, Any]]
|
|
) -> str:
|
|
"""
|
|
Process multimodal query content to generate enhanced query text
|
|
|
|
Args:
|
|
base_query: Base query text
|
|
multimodal_content: List of multimodal content
|
|
|
|
Returns:
|
|
str: Enhanced query text
|
|
"""
|
|
self.logger.info("Starting multimodal query content processing...")
|
|
|
|
enhanced_parts = [f"User query: {base_query}"]
|
|
|
|
for i, content in enumerate(multimodal_content):
|
|
content_type = content.get("type", "unknown")
|
|
self.logger.info(
|
|
f"Processing {i+1}/{len(multimodal_content)} multimodal content: {content_type}"
|
|
)
|
|
|
|
try:
|
|
# Get appropriate processor
|
|
processor = get_processor_for_type(self.modal_processors, content_type)
|
|
|
|
if processor:
|
|
# Generate content description
|
|
description = await self._generate_query_content_description(
|
|
processor, content, content_type
|
|
)
|
|
enhanced_parts.append(
|
|
f"\nRelated {content_type} content: {description}"
|
|
)
|
|
else:
|
|
# If no appropriate processor, use basic description
|
|
basic_desc = str(content)[:200]
|
|
enhanced_parts.append(
|
|
f"\nRelated {content_type} content: {basic_desc}"
|
|
)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error processing multimodal content: {str(e)}")
|
|
# Continue processing other content
|
|
continue
|
|
|
|
enhanced_query = "\n".join(enhanced_parts)
|
|
enhanced_query += PROMPTS["QUERY_ENHANCEMENT_SUFFIX"]
|
|
|
|
self.logger.info("Multimodal query content processing completed")
|
|
return enhanced_query
|
|
|
|
async def _generate_query_content_description(
|
|
self, processor, content: Dict[str, Any], content_type: str
|
|
) -> str:
|
|
"""
|
|
Generate content description for query
|
|
|
|
Args:
|
|
processor: Multimodal processor
|
|
content: Content data
|
|
content_type: Content type
|
|
|
|
Returns:
|
|
str: Content description
|
|
"""
|
|
try:
|
|
if content_type == "image":
|
|
return await self._describe_image_for_query(processor, content)
|
|
elif content_type == "table":
|
|
return await self._describe_table_for_query(processor, content)
|
|
elif content_type == "equation":
|
|
return await self._describe_equation_for_query(processor, content)
|
|
else:
|
|
return await self._describe_generic_for_query(
|
|
processor, content, content_type
|
|
)
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating {content_type} description: {str(e)}")
|
|
return f"{content_type} content: {str(content)[:100]}"
|
|
|
|
async def _describe_image_for_query(
|
|
self, processor, content: Dict[str, Any]
|
|
) -> str:
|
|
"""Generate image description for query"""
|
|
image_path = content.get("img_path")
|
|
captions = content.get("img_caption", [])
|
|
footnotes = content.get("img_footnote", [])
|
|
|
|
if image_path and Path(image_path).exists():
|
|
# If image exists, use vision model to generate description
|
|
image_base64 = processor._encode_image_to_base64(image_path)
|
|
if image_base64:
|
|
prompt = PROMPTS["QUERY_IMAGE_DESCRIPTION"]
|
|
description = await processor.modal_caption_func(
|
|
prompt,
|
|
image_data=image_base64,
|
|
system_prompt=PROMPTS["QUERY_IMAGE_ANALYST_SYSTEM"],
|
|
)
|
|
return description
|
|
|
|
# If image doesn't exist or processing failed, use existing information
|
|
parts = []
|
|
if image_path:
|
|
parts.append(f"Image path: {image_path}")
|
|
if captions:
|
|
parts.append(f"Image captions: {', '.join(captions)}")
|
|
if footnotes:
|
|
parts.append(f"Image footnotes: {', '.join(footnotes)}")
|
|
|
|
return "; ".join(parts) if parts else "Image content information incomplete"
|
|
|
|
async def _describe_table_for_query(
|
|
self, processor, content: Dict[str, Any]
|
|
) -> str:
|
|
"""Generate table description for query"""
|
|
table_data = content.get("table_data", "")
|
|
table_caption = content.get("table_caption", "")
|
|
|
|
prompt = PROMPTS["QUERY_TABLE_ANALYSIS"].format(
|
|
table_data=table_data, table_caption=table_caption
|
|
)
|
|
|
|
description = await processor.modal_caption_func(
|
|
prompt, system_prompt=PROMPTS["QUERY_TABLE_ANALYST_SYSTEM"]
|
|
)
|
|
|
|
return description
|
|
|
|
async def _describe_equation_for_query(
|
|
self, processor, content: Dict[str, Any]
|
|
) -> str:
|
|
"""Generate equation description for query"""
|
|
latex = content.get("latex", "")
|
|
equation_caption = content.get("equation_caption", "")
|
|
|
|
prompt = PROMPTS["QUERY_EQUATION_ANALYSIS"].format(
|
|
latex=latex, equation_caption=equation_caption
|
|
)
|
|
|
|
description = await processor.modal_caption_func(
|
|
prompt, system_prompt=PROMPTS["QUERY_EQUATION_ANALYST_SYSTEM"]
|
|
)
|
|
|
|
return description
|
|
|
|
async def _describe_generic_for_query(
|
|
self, processor, content: Dict[str, Any], content_type: str
|
|
) -> str:
|
|
"""Generate generic content description for query"""
|
|
content_str = str(content)
|
|
|
|
prompt = PROMPTS["QUERY_GENERIC_ANALYSIS"].format(
|
|
content_type=content_type, content_str=content_str
|
|
)
|
|
|
|
description = await processor.modal_caption_func(
|
|
prompt,
|
|
system_prompt=PROMPTS["QUERY_GENERIC_ANALYST_SYSTEM"].format(
|
|
content_type=content_type
|
|
),
|
|
)
|
|
|
|
return description
|
|
|
|
# Synchronous versions of query methods
|
|
def query(self, query: str, mode: str = "hybrid", **kwargs) -> str:
|
|
"""
|
|
Synchronous version of pure text query
|
|
|
|
Args:
|
|
query: Query text
|
|
mode: Query mode ("local", "global", "hybrid", "naive", "mix", "bypass")
|
|
**kwargs: Other query parameters, will be passed to QueryParam
|
|
|
|
Returns:
|
|
str: Query result
|
|
"""
|
|
loop = always_get_an_event_loop()
|
|
return loop.run_until_complete(self.aquery(query, mode=mode, **kwargs))
|
|
|
|
def query_with_multimodal(
|
|
self,
|
|
query: str,
|
|
multimodal_content: List[Dict[str, Any]] = None,
|
|
mode: str = "hybrid",
|
|
**kwargs,
|
|
) -> str:
|
|
"""
|
|
Synchronous version of multimodal query
|
|
|
|
Args:
|
|
query: Base query text
|
|
multimodal_content: List of multimodal content, each element contains:
|
|
- type: Content type ("image", "table", "equation", etc.)
|
|
- Other fields depend on type (e.g., img_path, table_data, latex, etc.)
|
|
mode: Query mode ("local", "global", "hybrid", "naive", "mix", "bypass")
|
|
**kwargs: Other query parameters, will be passed to QueryParam
|
|
|
|
Returns:
|
|
str: Query result
|
|
"""
|
|
loop = always_get_an_event_loop()
|
|
return loop.run_until_complete(
|
|
self.aquery_with_multimodal(query, multimodal_content, mode=mode, **kwargs)
|
|
)
|