You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.4 KiB

3 weeks ago
"""
Configuration classes for RAGAnything
Contains configuration dataclasses with environment variable support
"""
from dataclasses import dataclass, field
from typing import List
from lightrag.utils import get_env_value
@dataclass
class RAGAnythingConfig:
"""Configuration class for RAGAnything with environment variable support"""
# Directory Configuration
# ---
working_dir: str = field(default=get_env_value("WORKING_DIR", "./rag_storage", str))
"""Directory where RAG storage and cache files are stored."""
# MinerU Parser Configuration
# ---
mineru_parse_method: str = field(
default=get_env_value("MINERU_PARSE_METHOD", "auto", str)
)
"""Default parsing method for MinerU: 'auto', 'ocr', or 'txt'."""
mineru_output_dir: str = field(
default=get_env_value("MINERU_OUTPUT_DIR", "./output", str)
)
"""Default output directory for MinerU parsed content."""
display_content_stats: bool = field(
default=get_env_value("DISPLAY_CONTENT_STATS", True, bool)
)
"""Whether to display content statistics during parsing."""
# Multimodal Processing Configuration
# ---
enable_image_processing: bool = field(
default=get_env_value("ENABLE_IMAGE_PROCESSING", True, bool)
)
"""Enable image content processing."""
enable_table_processing: bool = field(
default=get_env_value("ENABLE_TABLE_PROCESSING", True, bool)
)
"""Enable table content processing."""
enable_equation_processing: bool = field(
default=get_env_value("ENABLE_EQUATION_PROCESSING", True, bool)
)
"""Enable equation content processing."""
# Batch Processing Configuration
# ---
max_concurrent_files: int = field(
default=get_env_value("MAX_CONCURRENT_FILES", 1, int)
)
"""Maximum number of files to process concurrently."""
supported_file_extensions: List[str] = field(
default_factory=lambda: get_env_value(
"SUPPORTED_FILE_EXTENSIONS",
".pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md",
str,
).split(",")
)
"""List of supported file extensions for batch processing."""
recursive_folder_processing: bool = field(
default=get_env_value("RECURSIVE_FOLDER_PROCESSING", True, bool)
)
"""Whether to recursively process subfolders in batch mode."""