You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
# 基础路径配置
|
|
|
|
|
basePath = r'D:\dsWork\PaddleOCR'
|
|
|
|
|
|
|
|
|
|
# PDF文件配置
|
|
|
|
|
source_pdf = basePath + r'\大数据研究苏轼\[047.中国古典文学基本丛书.苏轼词编年校注].王宗堂,邹同庆撰.扫描版.pdf'
|
|
|
|
|
|
|
|
|
|
# 图片处理配置
|
|
|
|
|
images_dir = f"{basePath}\\Images"
|
|
|
|
|
compressed_images_dir = f"{basePath}\\ImagesSmall"
|
|
|
|
|
target_width = 587 # 目标宽度(默认587像素)
|
|
|
|
|
image_quality = 85 # JPEG压缩质量
|
|
|
|
|
|
|
|
|
|
# 时间格式
|
|
|
|
|
time_format = '%Y-%m-%d %H:%M:%S'
|
|
|
|
|
|
|
|
|
|
# OCR结果输出配置
|
|
|
|
|
ocr_output_dir = f"{basePath}\\KeCheng\\Txt"
|
|
|
|
|
markdown_output_dir = f"{basePath}\\KeCheng\\Txt"
|