import logging # 直接获取模块专属日志器(无需重复配置) logger = logging.getLogger(__name__) # 创建模型 # pipeline = PPStructureV3(use_doc_orientation_classify=False,use_doc_unwarping=False) # def ocrWithPPStructureV3(image_paths: array): # # 对给定图片进行OCR识别 # for image_path in image_paths: # logger.info(f"正在识别图片:{image_path}") # md5 = image_path.replace("\\", "/").split("/")[-2] # output = pipeline.predict(input=image_path) # output_path = f"output/{md5}" # # 如果输出目录不存在,则创建 # if not os.path.exists(output_path): # logger.info(f"创建目录:{output_path}") # os.makedirs(output_path, True) # for res in output: # print(res) # res.save_to_markdown(save_path=output_path) # logger.info(f"成功保存到 {output_path}") img_path = r'D:\dsWork\dsProject\dsLightRag\Test\split_images\a62dce9d67c818accf94113aabefe172\3.png' from pathlib import Path from paddleocr import PPStructureV3 pipeline = PPStructureV3( use_doc_orientation_classify=False, use_doc_unwarping=False ) output = pipeline.predict(input=img_path) for res in output: res.save_to_markdown(save_path="output")