You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.4 KiB

"""
Configuration classes for RAGAnything
Contains configuration dataclasses with environment variable support
"""
from dataclasses import dataclass, field
from typing import List
from lightrag.utils import get_env_value
@dataclass
class RAGAnythingConfig:
"""Configuration class for RAGAnything with environment variable support"""
# Directory Configuration
# ---
working_dir: str = field(default=get_env_value("WORKING_DIR", "./rag_storage", str))
"""Directory where RAG storage and cache files are stored."""
# MinerU Parser Configuration
# ---
mineru_parse_method: str = field(
default=get_env_value("MINERU_PARSE_METHOD", "auto", str)
)
"""Default parsing method for MinerU: 'auto', 'ocr', or 'txt'."""
mineru_output_dir: str = field(
default=get_env_value("MINERU_OUTPUT_DIR", "./output", str)
)
"""Default output directory for MinerU parsed content."""
display_content_stats: bool = field(
default=get_env_value("DISPLAY_CONTENT_STATS", True, bool)
)
"""Whether to display content statistics during parsing."""
# Multimodal Processing Configuration
# ---
enable_image_processing: bool = field(
default=get_env_value("ENABLE_IMAGE_PROCESSING", True, bool)
)
"""Enable image content processing."""
enable_table_processing: bool = field(
default=get_env_value("ENABLE_TABLE_PROCESSING", True, bool)
)
"""Enable table content processing."""
enable_equation_processing: bool = field(
default=get_env_value("ENABLE_EQUATION_PROCESSING", True, bool)
)
"""Enable equation content processing."""
# Batch Processing Configuration
# ---
max_concurrent_files: int = field(
default=get_env_value("MAX_CONCURRENT_FILES", 1, int)
)
"""Maximum number of files to process concurrently."""
supported_file_extensions: List[str] = field(
default_factory=lambda: get_env_value(
"SUPPORTED_FILE_EXTENSIONS",
".pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md",
str,
).split(",")
)
"""List of supported file extensions for batch processing."""
recursive_folder_processing: bool = field(
default=get_env_value("RECURSIVE_FOLDER_PROCESSING", True, bool)
)
"""Whether to recursively process subfolders in batch mode."""