89 lines
3.2 KiB
Python
89 lines
3.2 KiB
Python
|
import hashlib
|
|||
|
|
|||
|
import cv2
|
|||
|
import numpy as np
|
|||
|
import os
|
|||
|
import logging
|
|||
|
|
|||
|
# 直接获取模块专属日志器(无需重复配置)
|
|||
|
logger = logging.getLogger(__name__)
|
|||
|
|
|||
|
|
|||
|
def split_image_by_height_and_blank(
|
|||
|
input_path,
|
|||
|
output_dir='split_images',
|
|||
|
target_height=500, # 目标分割高度
|
|||
|
):
|
|||
|
"""按高度累积+空白行分割图像
|
|||
|
Args:
|
|||
|
input_path: 输入图片路径
|
|||
|
output_dir: 输出目录
|
|||
|
target_height: 目标累积高度
|
|||
|
"""
|
|||
|
# 计算input_path的md5值
|
|||
|
md5 = hashlib.md5(input_path.encode()).hexdigest()
|
|||
|
output_dir = output_dir + f"/{md5}"
|
|||
|
os.makedirs(output_dir, exist_ok=True)
|
|||
|
img = cv2.imread(input_path)
|
|||
|
if img is None:
|
|||
|
raise FileNotFoundError(f"无法读取图片: {input_path}")
|
|||
|
|
|||
|
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|||
|
height, width = gray.shape
|
|||
|
logger.info(f"原始图片尺寸: {width}x{height}")
|
|||
|
|
|||
|
segments = []
|
|||
|
start_y = 0
|
|||
|
current_height = 0
|
|||
|
blank_line_count = 0 # 空白行计数器
|
|||
|
|
|||
|
for y in range(height):
|
|||
|
current_height += 1
|
|||
|
|
|||
|
# 达到目标高度开始寻找空白行
|
|||
|
if current_height >= target_height:
|
|||
|
# 检查当前行是否为空白行(全白)
|
|||
|
row = gray[y, :]
|
|||
|
# 统计完全白色的像素数(值为255)
|
|||
|
white_pixels = np.sum(row == 255)
|
|||
|
# 只有整行都是白色像素才视为空白行
|
|||
|
if white_pixels == width:
|
|||
|
blank_line_count += 1 # 空白行计数+1
|
|||
|
#logger.debug(f"检测到空白行 #{blank_line_count} (行号: {y})")
|
|||
|
|
|||
|
# 累计到第10个空白行时进行截取
|
|||
|
if blank_line_count >= 10:
|
|||
|
segments.append((start_y, y))
|
|||
|
start_y = y + 1
|
|||
|
current_height = 0
|
|||
|
blank_line_count = 0 # 重置计数器
|
|||
|
|
|||
|
# 保存最后一段
|
|||
|
if start_y < height:
|
|||
|
segments.append((start_y, height))
|
|||
|
|
|||
|
# 生成子图片
|
|||
|
sub_image_paths = []
|
|||
|
for i, (sy, ey) in enumerate(segments, 1):
|
|||
|
sub_img = img[sy:ey, :]
|
|||
|
|
|||
|
# 添加10行空白区域在顶部
|
|||
|
blank_height = 10
|
|||
|
if sub_img.shape[0] > 0 and sub_img.shape[1] > 0: # 确保子图像有效
|
|||
|
# 创建空白区域 (高度10, 与子图相同宽度, 3通道BGR)
|
|||
|
blank = np.ones((blank_height, sub_img.shape[1], 3), dtype=np.uint8) * 255 # 白色背景
|
|||
|
# 垂直拼接空白区域和子图像
|
|||
|
sub_img_with_blank = cv2.vconcat([blank, sub_img])
|
|||
|
else:
|
|||
|
sub_img_with_blank = sub_img # 处理空图像情况
|
|||
|
|
|||
|
sub_path = os.path.join(output_dir, f"{i}.png")
|
|||
|
# 添加PNG压缩级别参数,0表示无压缩
|
|||
|
cv2.imwrite(sub_path, sub_img_with_blank, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])
|
|||
|
sub_image_paths.append(sub_path)
|
|||
|
# 更新日志中的尺寸信息,反映添加空白后的实际尺寸
|
|||
|
logger.info(f"保存子图片: {sub_path} (尺寸: {sub_img_with_blank.shape[1]}x{sub_img_with_blank.shape[0]})")
|
|||
|
|
|||
|
logger.info(f"分割完成,共生成{len(sub_image_paths)}个子图片")
|
|||
|
return sub_image_paths
|