40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
import logging
|
|
|
|
# 直接获取模块专属日志器(无需重复配置)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# 创建模型
|
|
# pipeline = PPStructureV3(use_doc_orientation_classify=False,use_doc_unwarping=False)
|
|
|
|
|
|
# def ocrWithPPStructureV3(image_paths: array):
|
|
# # 对给定图片进行OCR识别
|
|
# for image_path in image_paths:
|
|
# logger.info(f"正在识别图片:{image_path}")
|
|
# md5 = image_path.replace("\\", "/").split("/")[-2]
|
|
# output = pipeline.predict(input=image_path)
|
|
# output_path = f"output/{md5}"
|
|
# # 如果输出目录不存在,则创建
|
|
# if not os.path.exists(output_path):
|
|
# logger.info(f"创建目录:{output_path}")
|
|
# os.makedirs(output_path, True)
|
|
# for res in output:
|
|
# print(res)
|
|
# res.save_to_markdown(save_path=output_path)
|
|
# logger.info(f"成功保存到 {output_path}")
|
|
|
|
img_path = r'D:\dsWork\dsProject\dsLightRag\Test\split_images\a62dce9d67c818accf94113aabefe172\3.png'
|
|
|
|
from pathlib import Path
|
|
from paddleocr import PPStructureV3
|
|
|
|
pipeline = PPStructureV3(
|
|
use_doc_orientation_classify=False,
|
|
use_doc_unwarping=False
|
|
)
|
|
|
|
output = pipeline.predict(input=img_path)
|
|
|
|
for res in output:
|
|
res.save_to_markdown(save_path="output")
|