You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

18 lines
609 B

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# 基础路径配置
basePath = r'D:\dsWork\PaddleOCR'
# PDF文件配置
source_pdf = basePath + r'\大数据研究苏轼\[047.中国古典文学基本丛书.苏轼词编年校注].王宗堂,邹同庆撰.扫描版.pdf'
# 图片处理配置
images_dir = f"{basePath}\\Images"
compressed_images_dir = f"{basePath}\\ImagesSmall"
target_width = 587 # 目标宽度默认587像素
image_quality = 85 # JPEG压缩质量
# 时间格式
time_format = '%Y-%m-%d %H:%M:%S'
# OCR结果输出配置
ocr_output_dir = f"{basePath}\\KeCheng\\Txt"
markdown_output_dir = f"{basePath}\\KeCheng\\Txt"