You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
7.4 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
"""
Image Format Parsing Test Script for RAG-Anything
This script demonstrates how to parse various image formats
using MinerU, including JPG, PNG, BMP, TIFF, GIF, and WebP files.
Requirements:
- PIL/Pillow library for format conversion
- RAG-Anything package
Usage:
python image_format_test.py --file path/to/image.bmp
"""
import argparse
import sys
from pathlib import Path
from raganything import RAGAnything
def check_pillow_installation():
"""Check if PIL/Pillow is installed and available"""
try:
from PIL import Image
print(
f"✅ PIL/Pillow found: PIL version {Image.__version__ if hasattr(Image, '__version__') else 'Unknown'}"
)
return True
except ImportError:
print("❌ PIL/Pillow not found. Please install Pillow:")
print(" pip install Pillow")
return False
def get_image_info(image_path: Path):
"""Get detailed image information"""
try:
from PIL import Image
with Image.open(image_path) as img:
return {
"format": img.format,
"mode": img.mode,
"size": img.size,
"has_transparency": img.mode in ("RGBA", "LA")
or "transparency" in img.info,
}
except Exception as e:
return {"error": str(e)}
def test_image_format_parsing(file_path: str):
"""Test image format parsing with MinerU"""
print(f"🧪 Testing image format parsing: {file_path}")
# Check if file exists and is a supported image format
file_path = Path(file_path)
if not file_path.exists():
print(f"❌ File does not exist: {file_path}")
return False
supported_extensions = {
".jpg",
".jpeg",
".png",
".bmp",
".tiff",
".tif",
".gif",
".webp",
}
if file_path.suffix.lower() not in supported_extensions:
print(f"❌ Unsupported file format: {file_path.suffix}")
print(f" Supported formats: {', '.join(supported_extensions)}")
return False
print(f"📸 File format: {file_path.suffix.upper()}")
print(f"📏 File size: {file_path.stat().st_size / 1024:.1f} KB")
# Get detailed image information
img_info = get_image_info(file_path)
if "error" not in img_info:
print("🖼️ Image info:")
print(f" • Format: {img_info['format']}")
print(f" • Mode: {img_info['mode']}")
print(f" • Size: {img_info['size'][0]}x{img_info['size'][1]}")
print(f" • Has transparency: {img_info['has_transparency']}")
# Check format compatibility with MinerU
mineru_native_formats = {".jpg", ".jpeg", ".png"}
needs_conversion = file_path.suffix.lower() not in mineru_native_formats
if needs_conversion:
print(
f" Format {file_path.suffix.upper()} will be converted to PNG for MinerU compatibility"
)
else:
print(f"✅ Format {file_path.suffix.upper()} is natively supported by MinerU")
# Initialize RAGAnything (only for parsing functionality)
rag = RAGAnything(working_dir="./temp_parsing_test")
try:
# Test image parsing with MinerU
print("\n🔄 Testing image parsing with MinerU...")
content_list, md_content = rag.parse_document(
file_path=str(file_path),
output_dir="./test_output",
parse_method="ocr", # Images use OCR method
display_stats=True,
)
print("✅ Parsing successful!")
print(f" 📊 Content blocks: {len(content_list)}")
print(f" 📝 Markdown length: {len(md_content)} characters")
# Analyze content types
content_types = {}
for item in content_list:
if isinstance(item, dict):
content_type = item.get("type", "unknown")
content_types[content_type] = content_types.get(content_type, 0) + 1
if content_types:
print(" 📋 Content distribution:")
for content_type, count in sorted(content_types.items()):
print(f"{content_type}: {count}")
# Display extracted text (if any)
if md_content.strip():
print("\n📄 Extracted text preview (first 500 characters):")
preview = md_content.strip()[:500]
print(f" {preview}{'...' if len(md_content) > 500 else ''}")
else:
print("\n📄 No text extracted from the image")
# Display image processing results
image_items = [
item
for item in content_list
if isinstance(item, dict) and item.get("type") == "image"
]
if image_items:
print(f"\n🖼️ Found {len(image_items)} processed image(s):")
for i, item in enumerate(image_items, 1):
print(f" {i}. Image path: {item.get('img_path', 'N/A')}")
if item.get("img_caption"):
print(
f" Caption: {item.get('img_caption', [])[0] if item.get('img_caption') else 'N/A'}"
)
# Display text blocks (OCR results)
text_items = [
item
for item in content_list
if isinstance(item, dict) and item.get("type") == "text"
]
if text_items:
print("\n📝 OCR text blocks found:")
for i, item in enumerate(text_items, 1):
text_content = item.get("text", "")
if text_content.strip():
preview = text_content.strip()[:200]
print(
f" {i}. {preview}{'...' if len(text_content) > 200 else ''}"
)
# Check for any tables detected in the image
table_items = [
item
for item in content_list
if isinstance(item, dict) and item.get("type") == "table"
]
if table_items:
print(f"\n📊 Found {len(table_items)} table(s) in image:")
for i, item in enumerate(table_items, 1):
print(f" {i}. Table detected with content")
print("\n🎉 Image format parsing test completed successfully!")
print("📁 Output files saved to: ./test_output")
return True
except Exception as e:
print(f"\n❌ Image format parsing failed: {str(e)}")
import traceback
print(f" Full error: {traceback.format_exc()}")
return False
def main():
"""Main function"""
parser = argparse.ArgumentParser(
description="Test image format parsing with MinerU"
)
parser.add_argument("--file", required=True, help="Path to the image file to test")
parser.add_argument(
"--check-pillow", action="store_true", help="Only check PIL/Pillow installation"
)
args = parser.parse_args()
# Check PIL/Pillow installation
print("🔧 Checking PIL/Pillow installation...")
if not check_pillow_installation():
return 1
if args.check_pillow:
print("✅ PIL/Pillow installation check passed!")
return 0
# Run the parsing test
try:
success = test_image_format_parsing(args.file)
return 0 if success else 1
except KeyboardInterrupt:
print("\n⏹️ Test interrupted by user")
return 1
except Exception as e:
print(f"\n❌ Unexpected error: {str(e)}")
return 1
if __name__ == "__main__":
sys.exit(main())