import hashlib import cv2 import numpy as np import os import logging # 直接获取模块专属日志器(无需重复配置) logger = logging.getLogger(__name__) def split_image_by_height_and_blank( input_path, output_dir='split_images', target_height=500, # 目标分割高度 ): """按高度累积+空白行分割图像 Args: input_path: 输入图片路径 output_dir: 输出目录 target_height: 目标累积高度 """ # 计算input_path的md5值 md5 = hashlib.md5(input_path.encode()).hexdigest() output_dir = output_dir + f"/{md5}" os.makedirs(output_dir, exist_ok=True) img = cv2.imread(input_path) if img is None: raise FileNotFoundError(f"无法读取图片: {input_path}") gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) height, width = gray.shape logger.info(f"原始图片尺寸: {width}x{height}") segments = [] start_y = 0 current_height = 0 blank_line_count = 0 # 空白行计数器 for y in range(height): current_height += 1 # 达到目标高度开始寻找空白行 if current_height >= target_height: # 检查当前行是否为空白行(全白) row = gray[y, :] # 统计完全白色的像素数(值为255) white_pixels = np.sum(row == 255) # 只有整行都是白色像素才视为空白行 if white_pixels == width: blank_line_count += 1 # 空白行计数+1 #logger.debug(f"检测到空白行 #{blank_line_count} (行号: {y})") # 累计到第10个空白行时进行截取 if blank_line_count >= 10: segments.append((start_y, y)) start_y = y + 1 current_height = 0 blank_line_count = 0 # 重置计数器 # 保存最后一段 if start_y < height: segments.append((start_y, height)) # 生成子图片 sub_image_paths = [] for i, (sy, ey) in enumerate(segments, 1): sub_img = img[sy:ey, :] # 添加10行空白区域在顶部 blank_height = 10 if sub_img.shape[0] > 0 and sub_img.shape[1] > 0: # 确保子图像有效 # 创建空白区域 (高度10, 与子图相同宽度, 3通道BGR) blank = np.ones((blank_height, sub_img.shape[1], 3), dtype=np.uint8) * 255 # 白色背景 # 垂直拼接空白区域和子图像 sub_img_with_blank = cv2.vconcat([blank, sub_img]) else: sub_img_with_blank = sub_img # 处理空图像情况 sub_path = os.path.join(output_dir, f"{i}.png") # 添加PNG压缩级别参数,0表示无压缩 cv2.imwrite(sub_path, sub_img_with_blank, [int(cv2.IMWRITE_PNG_COMPRESSION), 0]) sub_image_paths.append(sub_path) # 更新日志中的尺寸信息,反映添加空白后的实际尺寸 logger.info(f"保存子图片: {sub_path} (尺寸: {sub_img_with_blank.shape[1]}x{sub_img_with_blank.shape[0]})") logger.info(f"分割完成,共生成{len(sub_image_paths)}个子图片") return sub_image_paths