diff --git a/AI/Ocr/Test.py b/AI/Ocr/Test.py deleted file mode 100644 index a4e03bdd..00000000 --- a/AI/Ocr/Test.py +++ /dev/null @@ -1,54 +0,0 @@ -import easyocr -import os -import re -import pandas as pd - - -class id_card_ocr(): - - def __init__(self): # 文件位置 - self.images = r'D:/id_card' # 需要注意的是,图片文件的名称不能有汉字,否则会报错~ - - def ocr_reader(self): # 创建ocr对象,识别中英文 - ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True) - return ocr - - def read_content(self): # 识别图片文字,并遍历 - data = [] - for image in os.listdir(self.images): - content = self.ocr_reader().readtext(f'{self.images}/{image}', detail=0) - content = ''.join(content) # 列表转换为纯文本 - new_content = content.replace(" ", "") # 去除掉空格内容 - print(f'正在识别:{image}') - name = re.findall(r'名(.*?)性', new_content) - gender = re.findall(r'别(.*?)民族|民', new_content) - nation = re.findall(r'族|民族(.*?)出', new_content) - address = re.findall(r'址(.*?)公', new_content) - number = re.findall(r'身份号码(\d+)', new_content) - - new_name = ''.join(name) - new_gender = ''.join(gender) - new_nation = ''.join(nation) - new_address = ''.join(address) - new_number = ''.join(number) - if len(new_number) == 18: # 判断身份证的位数 - pass - elif len(new_number) == 17: - new_number = new_number + "X" - print(f'完成识别:{image}') - data.append([new_name, new_gender, new_nation, new_address, new_number]) - print(data) - return data - - def read_to_excel(self): - df = pd.DataFrame(self.read_content(), columns=['姓名', '性别', '民族', '地址', '身份证号码']) - print(f'识别结果如下:') - print(df) - df.to_excel(r'D:/id_card/识别结果.xlsx', index=False) - return df - - -if __name__ == '__main__': - info = id_card_ocr() - info.read_content() - info.read_to_excel() \ No newline at end of file diff --git a/AI/Ocr/__init__.py b/AI/Ocr/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/AI/Ocr/步骤.txt b/AI/Ocr/步骤.txt new file mode 100644 index 00000000..19e30fc8 --- /dev/null +++ b/AI/Ocr/步骤.txt @@ -0,0 +1,4 @@ +conda activate py310 +python3 -m pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple + +pip install "paddleocr>=3.0.1" \ No newline at end of file diff --git a/AI/Pdf/T1_SplitPdf.py b/AI/Pdf/T1_SplitPdf.py new file mode 100644 index 00000000..8880598c --- /dev/null +++ b/AI/Pdf/T1_SplitPdf.py @@ -0,0 +1,7 @@ +import fitz # PyMuPDF +source=r'D:\BaiduNetdiskDownload\大数据研究苏轼\[047.中国古典文学基本丛书.苏轼词编年校注].王宗堂,邹同庆撰.扫描版.pdf' +pdf = fitz.open(source) +for page_num in range(len(pdf)): + page = pdf.load_page(page_num) + pix = page.get_pixmap(dpi=300) + pix.save(f"D:/usr/page_{page_num}.png") \ No newline at end of file diff --git a/AI/Pdf/处理步骤.docx b/AI/Pdf/处理步骤.docx new file mode 100644 index 00000000..09734b18 Binary files /dev/null and b/AI/Pdf/处理步骤.docx differ diff --git a/AI/Pdf/配置环境.md b/AI/Pdf/配置环境.md new file mode 100644 index 00000000..e227906f --- /dev/null +++ b/AI/Pdf/配置环境.md @@ -0,0 +1,92 @@ +### 1、查看有哪些环境 +```cmd +conda info --envs +``` +```angular2html +# conda environments: +# +base D:\anaconda3 +py310 D:\anaconda3\envs\py310 + * d:\anaconda3 +``` +### 2、激活环境 +```cmd +conda activate py310 +``` + +### 3、设置为默认源 +升级pip + 配置pip使用清华源 +``` cmd +python -m pip install --upgrade pip +pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple +``` + +### 4、安装pip +```shell +pip install PyMuPDF +pip install opencv-python paddleocr + + +# 版本兼容性是深度学习应用中常见的问题,特别是当库频繁更新时。推荐使用PaddlePaddle 2.5.2 + PaddleOCR最新版的组合,这通常是最稳定的配置。 + +# 没用GPU的用这个 +pip install paddlepaddle==2.5.2 +# 有GPU的用这个 +pip install paddlepaddle-gpu==2.5.2 + +# 卸载 +pip uninstall paddlepaddle==2.5.2 -y +pip uninstall paddlepaddle-gpu==2.5.2 -y + +# 更新 +pip install --upgrade paddleocr +``` + +【保姆级】Windows 安装 CUDA 和 cuDNN +https://sspai.com/post/98397 +https://developer.download.nvidia.com/compute/cuda/12.9.1/local_installers/cuda_12.9.1_576.57_windows.exe + +10402852@qq.com +dsideal4r5t6y7u + + +[正确安装GPU显卡驱动、CUDA、cuDNN的详细教程](https://blog.csdn.net/qq_62928482/article/details/139674918) + +我的笔记本显卡:NVIDIA GeForce RTX 3060 Laptop GPU + + + +**nvidia-smi** + + + +NVIDIA-SMI 576.57 Driver Version: 576.57 CUDA Version: 12.9 + + + +[CUDA - Wikipedia](https://en.wikipedia.org/wiki/CUDA) + +![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250618074946862.png) + +![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250618075233227.png) + + +https://developer.download.nvidia.cn/compute/cuda/11.4.4/local_installers/cuda_11.4.4_472.50_windows.exe + + + +10402852@qq.com + +dsideal4r5t6y7u + + +方案介绍: + +https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/algorithm/PP-StructureV3/PP-StructureV3.html + +使用教程: + +https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/pipeline_usage/PP-StructureV3.html + + +paddleocr pp_structurev3 -i https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/pp_structure_v3_demo.png diff --git a/AI/错误日志.txt b/AI/错误日志.txt new file mode 100644 index 00000000..129fdaf4 --- /dev/null +++ b/AI/错误日志.txt @@ -0,0 +1,51 @@ +D:\anaconda3\envs\py310\python.exe D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py +Creating model: ('PP-LCNet_x1_0_doc_ori', None) +Using official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in C:\Users\Administrator\.paddlex\official_models. +Traceback (most recent call last): + File "D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py", line 29, in + pdf_to_text(source, target) + File "D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py", line 10, in pdf_to_text + ocr = PaddleOCR(use_textline_orientation=True, lang="ch", ocr_version='PP-OCRv3') + File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\ocr.py", line 161, in __init__ + super().__init__(**base_params) + File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\base.py", line 63, in __init__ + self.paddlex_pipeline = self._create_paddlex_pipeline() + File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\base.py", line 97, in _create_paddlex_pipeline + return create_pipeline(config=self._merged_paddlex_config, **kwargs) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\__init__.py", line 165, in create_pipeline + pipeline = BasePipeline.get(pipeline_name)( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\utils\deps.py", line 195, in _wrapper + return old_init_func(self, *args, **kwargs) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 103, in __init__ + self._pipeline = self._create_internal_pipeline(config, self.device) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 158, in _create_internal_pipeline + return self._pipeline_cls( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 73, in __init__ + self.doc_preprocessor_pipeline = self.create_pipeline( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\base.py", line 140, in create_pipeline + pipeline = create_pipeline( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\__init__.py", line 165, in create_pipeline + pipeline = BasePipeline.get(pipeline_name)( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\utils\deps.py", line 195, in _wrapper + return old_init_func(self, *args, **kwargs) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 103, in __init__ + self._pipeline = self._create_internal_pipeline(config, self.device) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 158, in _create_internal_pipeline + return self._pipeline_cls( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\doc_preprocessor\pipeline.py", line 67, in __init__ + self.doc_ori_classify_model = self.create_model(doc_ori_classify_config) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\base.py", line 107, in create_model + model = create_predictor( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\__init__.py", line 77, in create_predictor + return BasePredictor.get(model_name)( + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\image_classification\predictor.py", line 49, in __init__ + self.preprocessors, self.infer, self.postprocessors = self._build() + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\image_classification\predictor.py", line 82, in _build + infer = self.create_static_infer() + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 240, in create_static_infer + return PaddleInfer(self.model_dir, self.MODEL_FILE_PREFIX, self._pp_option) + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\common\static_infer.py", line 274, in __init__ + self.predictor = self._create() + File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\common\static_infer.py", line 360, in _create + config.set_optimization_level(3) +AttributeError: 'paddle.base.libpaddle.AnalysisConfig' object has no attribute 'set_optimization_level'. Did you mean: 'tensorrt_optimization_level'? \ No newline at end of file diff --git a/WebRoot/upload/temp_upload/compressed_水杯.png b/WebRoot/upload/temp_upload/compressed_水杯.png new file mode 100644 index 00000000..6e6f34e5 Binary files /dev/null and b/WebRoot/upload/temp_upload/compressed_水杯.png differ