You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
2.4 KiB
75 lines
2.4 KiB
"""
|
|
Configuration classes for RAGAnything
|
|
|
|
Contains configuration dataclasses with environment variable support
|
|
"""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import List
|
|
from lightrag.utils import get_env_value
|
|
|
|
|
|
@dataclass
|
|
class RAGAnythingConfig:
|
|
"""Configuration class for RAGAnything with environment variable support"""
|
|
|
|
# Directory Configuration
|
|
# ---
|
|
working_dir: str = field(default=get_env_value("WORKING_DIR", "./rag_storage", str))
|
|
"""Directory where RAG storage and cache files are stored."""
|
|
|
|
# MinerU Parser Configuration
|
|
# ---
|
|
mineru_parse_method: str = field(
|
|
default=get_env_value("MINERU_PARSE_METHOD", "auto", str)
|
|
)
|
|
"""Default parsing method for MinerU: 'auto', 'ocr', or 'txt'."""
|
|
|
|
mineru_output_dir: str = field(
|
|
default=get_env_value("MINERU_OUTPUT_DIR", "./output", str)
|
|
)
|
|
"""Default output directory for MinerU parsed content."""
|
|
|
|
display_content_stats: bool = field(
|
|
default=get_env_value("DISPLAY_CONTENT_STATS", True, bool)
|
|
)
|
|
"""Whether to display content statistics during parsing."""
|
|
|
|
# Multimodal Processing Configuration
|
|
# ---
|
|
enable_image_processing: bool = field(
|
|
default=get_env_value("ENABLE_IMAGE_PROCESSING", True, bool)
|
|
)
|
|
"""Enable image content processing."""
|
|
|
|
enable_table_processing: bool = field(
|
|
default=get_env_value("ENABLE_TABLE_PROCESSING", True, bool)
|
|
)
|
|
"""Enable table content processing."""
|
|
|
|
enable_equation_processing: bool = field(
|
|
default=get_env_value("ENABLE_EQUATION_PROCESSING", True, bool)
|
|
)
|
|
"""Enable equation content processing."""
|
|
|
|
# Batch Processing Configuration
|
|
# ---
|
|
max_concurrent_files: int = field(
|
|
default=get_env_value("MAX_CONCURRENT_FILES", 1, int)
|
|
)
|
|
"""Maximum number of files to process concurrently."""
|
|
|
|
supported_file_extensions: List[str] = field(
|
|
default_factory=lambda: get_env_value(
|
|
"SUPPORTED_FILE_EXTENSIONS",
|
|
".pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md",
|
|
str,
|
|
).split(",")
|
|
)
|
|
"""List of supported file extensions for batch processing."""
|
|
|
|
recursive_folder_processing: bool = field(
|
|
default=get_env_value("RECURSIVE_FOLDER_PROCESSING", True, bool)
|
|
)
|
|
"""Whether to recursively process subfolders in batch mode."""
|