You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

18 lines
609 B

1 month ago
# 基础路径配置
basePath = r'D:\dsWork\PaddleOCR'
# PDF文件配置
source_pdf = basePath + r'\大数据研究苏轼\[047.中国古典文学基本丛书.苏轼词编年校注].王宗堂,邹同庆撰.扫描版.pdf'
# 图片处理配置
images_dir = f"{basePath}\\Images"
compressed_images_dir = f"{basePath}\\ImagesSmall"
target_width = 587 # 目标宽度默认587像素
image_quality = 85 # JPEG压缩质量
# 时间格式
time_format = '%Y-%m-%d %H:%M:%S'
# OCR结果输出配置
ocr_output_dir = f"{basePath}\\KeCheng\\Txt"
markdown_output_dir = f"{basePath}\\KeCheng\\Txt"