89 lines
3.2 KiB
Python
89 lines
3.2 KiB
Python
import hashlib
|
||
|
||
import cv2
|
||
import numpy as np
|
||
import os
|
||
import logging
|
||
|
||
# 直接获取模块专属日志器(无需重复配置)
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def split_image_by_height_and_blank(
|
||
input_path,
|
||
output_dir='split_images',
|
||
target_height=500, # 目标分割高度
|
||
):
|
||
"""按高度累积+空白行分割图像
|
||
Args:
|
||
input_path: 输入图片路径
|
||
output_dir: 输出目录
|
||
target_height: 目标累积高度
|
||
"""
|
||
# 计算input_path的md5值
|
||
md5 = hashlib.md5(input_path.encode()).hexdigest()
|
||
output_dir = output_dir + f"/{md5}"
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
img = cv2.imread(input_path)
|
||
if img is None:
|
||
raise FileNotFoundError(f"无法读取图片: {input_path}")
|
||
|
||
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
||
height, width = gray.shape
|
||
logger.info(f"原始图片尺寸: {width}x{height}")
|
||
|
||
segments = []
|
||
start_y = 0
|
||
current_height = 0
|
||
blank_line_count = 0 # 空白行计数器
|
||
|
||
for y in range(height):
|
||
current_height += 1
|
||
|
||
# 达到目标高度开始寻找空白行
|
||
if current_height >= target_height:
|
||
# 检查当前行是否为空白行(全白)
|
||
row = gray[y, :]
|
||
# 统计完全白色的像素数(值为255)
|
||
white_pixels = np.sum(row == 255)
|
||
# 只有整行都是白色像素才视为空白行
|
||
if white_pixels == width:
|
||
blank_line_count += 1 # 空白行计数+1
|
||
#logger.debug(f"检测到空白行 #{blank_line_count} (行号: {y})")
|
||
|
||
# 累计到第10个空白行时进行截取
|
||
if blank_line_count >= 10:
|
||
segments.append((start_y, y))
|
||
start_y = y + 1
|
||
current_height = 0
|
||
blank_line_count = 0 # 重置计数器
|
||
|
||
# 保存最后一段
|
||
if start_y < height:
|
||
segments.append((start_y, height))
|
||
|
||
# 生成子图片
|
||
sub_image_paths = []
|
||
for i, (sy, ey) in enumerate(segments, 1):
|
||
sub_img = img[sy:ey, :]
|
||
|
||
# 添加10行空白区域在顶部
|
||
blank_height = 10
|
||
if sub_img.shape[0] > 0 and sub_img.shape[1] > 0: # 确保子图像有效
|
||
# 创建空白区域 (高度10, 与子图相同宽度, 3通道BGR)
|
||
blank = np.ones((blank_height, sub_img.shape[1], 3), dtype=np.uint8) * 255 # 白色背景
|
||
# 垂直拼接空白区域和子图像
|
||
sub_img_with_blank = cv2.vconcat([blank, sub_img])
|
||
else:
|
||
sub_img_with_blank = sub_img # 处理空图像情况
|
||
|
||
sub_path = os.path.join(output_dir, f"{i}.png")
|
||
# 添加PNG压缩级别参数,0表示无压缩
|
||
cv2.imwrite(sub_path, sub_img_with_blank, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])
|
||
sub_image_paths.append(sub_path)
|
||
# 更新日志中的尺寸信息,反映添加空白后的实际尺寸
|
||
logger.info(f"保存子图片: {sub_path} (尺寸: {sub_img_with_blank.shape[1]}x{sub_img_with_blank.shape[0]})")
|
||
|
||
logger.info(f"分割完成,共生成{len(sub_image_paths)}个子图片")
|
||
return sub_image_paths
|