#!/usr/bin/env python3 """ Image Format Parsing Test Script for RAG-Anything This script demonstrates how to parse various image formats using MinerU, including JPG, PNG, BMP, TIFF, GIF, and WebP files. Requirements: - PIL/Pillow library for format conversion - RAG-Anything package Usage: python image_format_test.py --file path/to/image.bmp """ import argparse import sys from pathlib import Path from raganything import RAGAnything def check_pillow_installation(): """Check if PIL/Pillow is installed and available""" try: from PIL import Image print( f"โœ… PIL/Pillow found: PIL version {Image.__version__ if hasattr(Image, '__version__') else 'Unknown'}" ) return True except ImportError: print("โŒ PIL/Pillow not found. Please install Pillow:") print(" pip install Pillow") return False def get_image_info(image_path: Path): """Get detailed image information""" try: from PIL import Image with Image.open(image_path) as img: return { "format": img.format, "mode": img.mode, "size": img.size, "has_transparency": img.mode in ("RGBA", "LA") or "transparency" in img.info, } except Exception as e: return {"error": str(e)} def test_image_format_parsing(file_path: str): """Test image format parsing with MinerU""" print(f"๐Ÿงช Testing image format parsing: {file_path}") # Check if file exists and is a supported image format file_path = Path(file_path) if not file_path.exists(): print(f"โŒ File does not exist: {file_path}") return False supported_extensions = { ".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp", } if file_path.suffix.lower() not in supported_extensions: print(f"โŒ Unsupported file format: {file_path.suffix}") print(f" Supported formats: {', '.join(supported_extensions)}") return False print(f"๐Ÿ“ธ File format: {file_path.suffix.upper()}") print(f"๐Ÿ“ File size: {file_path.stat().st_size / 1024:.1f} KB") # Get detailed image information img_info = get_image_info(file_path) if "error" not in img_info: print("๐Ÿ–ผ๏ธ Image info:") print(f" โ€ข Format: {img_info['format']}") print(f" โ€ข Mode: {img_info['mode']}") print(f" โ€ข Size: {img_info['size'][0]}x{img_info['size'][1]}") print(f" โ€ข Has transparency: {img_info['has_transparency']}") # Check format compatibility with MinerU mineru_native_formats = {".jpg", ".jpeg", ".png"} needs_conversion = file_path.suffix.lower() not in mineru_native_formats if needs_conversion: print( f"โ„น๏ธ Format {file_path.suffix.upper()} will be converted to PNG for MinerU compatibility" ) else: print(f"โœ… Format {file_path.suffix.upper()} is natively supported by MinerU") # Initialize RAGAnything (only for parsing functionality) rag = RAGAnything(working_dir="./temp_parsing_test") try: # Test image parsing with MinerU print("\n๐Ÿ”„ Testing image parsing with MinerU...") content_list, md_content = rag.parse_document( file_path=str(file_path), output_dir="./test_output", parse_method="ocr", # Images use OCR method display_stats=True, ) print("โœ… Parsing successful!") print(f" ๐Ÿ“Š Content blocks: {len(content_list)}") print(f" ๐Ÿ“ Markdown length: {len(md_content)} characters") # Analyze content types content_types = {} for item in content_list: if isinstance(item, dict): content_type = item.get("type", "unknown") content_types[content_type] = content_types.get(content_type, 0) + 1 if content_types: print(" ๐Ÿ“‹ Content distribution:") for content_type, count in sorted(content_types.items()): print(f" โ€ข {content_type}: {count}") # Display extracted text (if any) if md_content.strip(): print("\n๐Ÿ“„ Extracted text preview (first 500 characters):") preview = md_content.strip()[:500] print(f" {preview}{'...' if len(md_content) > 500 else ''}") else: print("\n๐Ÿ“„ No text extracted from the image") # Display image processing results image_items = [ item for item in content_list if isinstance(item, dict) and item.get("type") == "image" ] if image_items: print(f"\n๐Ÿ–ผ๏ธ Found {len(image_items)} processed image(s):") for i, item in enumerate(image_items, 1): print(f" {i}. Image path: {item.get('img_path', 'N/A')}") if item.get("img_caption"): print( f" Caption: {item.get('img_caption', [])[0] if item.get('img_caption') else 'N/A'}" ) # Display text blocks (OCR results) text_items = [ item for item in content_list if isinstance(item, dict) and item.get("type") == "text" ] if text_items: print("\n๐Ÿ“ OCR text blocks found:") for i, item in enumerate(text_items, 1): text_content = item.get("text", "") if text_content.strip(): preview = text_content.strip()[:200] print( f" {i}. {preview}{'...' if len(text_content) > 200 else ''}" ) # Check for any tables detected in the image table_items = [ item for item in content_list if isinstance(item, dict) and item.get("type") == "table" ] if table_items: print(f"\n๐Ÿ“Š Found {len(table_items)} table(s) in image:") for i, item in enumerate(table_items, 1): print(f" {i}. Table detected with content") print("\n๐ŸŽ‰ Image format parsing test completed successfully!") print("๐Ÿ“ Output files saved to: ./test_output") return True except Exception as e: print(f"\nโŒ Image format parsing failed: {str(e)}") import traceback print(f" Full error: {traceback.format_exc()}") return False def main(): """Main function""" parser = argparse.ArgumentParser( description="Test image format parsing with MinerU" ) parser.add_argument("--file", required=True, help="Path to the image file to test") parser.add_argument( "--check-pillow", action="store_true", help="Only check PIL/Pillow installation" ) args = parser.parse_args() # Check PIL/Pillow installation print("๐Ÿ”ง Checking PIL/Pillow installation...") if not check_pillow_installation(): return 1 if args.check_pillow: print("โœ… PIL/Pillow installation check passed!") return 0 # Run the parsing test try: success = test_image_format_parsing(args.file) return 0 if success else 1 except KeyboardInterrupt: print("\nโน๏ธ Test interrupted by user") return 1 except Exception as e: print(f"\nโŒ Unexpected error: {str(e)}") return 1 if __name__ == "__main__": sys.exit(main())