'commit'

2 weeks ago · 841c0d929f
parent f19d70ac8c
commit 841c0d929f
8 changed files with 154 additions and 54 deletions
--- a/AI/Ocr/Test.py
+++ b/AI/Ocr/Test.py
@ -1,54 +0,0 @@
-import easyocr
-import os
-import re
-import pandas as pd
-
-
-class id_card_ocr():
-
-    def __init__(self):  # 文件位置
-        self.images = r'D:/id_card'  # 需要注意的是，图片文件的名称不能有汉字，否则会报错~
-
-    def ocr_reader(self):  # 创建ocr对象，识别中英文
-        ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True)
-        return ocr
-
-    def read_content(self):  # 识别图片文字,并遍历
-        data = []
-        for image in os.listdir(self.images):
-            content = self.ocr_reader().readtext(f'{self.images}/{image}', detail=0)
-            content = ''.join(content)  # 列表转换为纯文本
-            new_content = content.replace(" ", "")  # 去除掉空格内容
-            print(f'正在识别:{image}')
-            name = re.findall(r'名(.*?)性', new_content)
-            gender = re.findall(r'别(.*?)民族|民', new_content)
-            nation = re.findall(r'族|民族(.*?)出', new_content)
-            address = re.findall(r'址(.*?)公', new_content)
-            number = re.findall(r'身份号码(\d+)', new_content)
-
-            new_name = ''.join(name)
-            new_gender = ''.join(gender)
-            new_nation = ''.join(nation)
-            new_address = ''.join(address)
-            new_number = ''.join(number)
-            if len(new_number) == 18:  # 判断身份证的位数
-                pass
-            elif len(new_number) == 17:
-                new_number = new_number + "X"
-            print(f'完成识别:{image}')
-            data.append([new_name, new_gender, new_nation, new_address, new_number])
-        print(data)
-        return data
-
-    def read_to_excel(self):
-        df = pd.DataFrame(self.read_content(), columns=['姓名', '性别', '民族', '地址', '身份证号码'])
-        print(f'识别结果如下：')
-        print(df)
-        df.to_excel(r'D:/id_card/识别结果.xlsx', index=False)
-        return df
-
-
-if __name__ == '__main__':
-    info = id_card_ocr()
-    info.read_content()
-    info.read_to_excel()
--- a/AI/Ocr/init.py
+++ b/AI/Ocr/init.py
--- a/AI/Ocr/步骤.txt
+++ b/AI/Ocr/步骤.txt
@ -0,0 +1,4 @@
+conda activate py310
+python3 -m pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
+
+pip install "paddleocr>=3.0.1"
--- a/AI/Pdf/T1_SplitPdf.py
+++ b/AI/Pdf/T1_SplitPdf.py
@ -0,0 +1,7 @@
+import fitz  # PyMuPDF
+source=r'D:\BaiduNetdiskDownload\大数据研究苏轼\[047.中国古典文学基本丛书.苏轼词编年校注].王宗堂,邹同庆撰.扫描版.pdf'
+pdf = fitz.open(source)
+for page_num in range(len(pdf)):
+    page = pdf.load_page(page_num)
+    pix = page.get_pixmap(dpi=300)
+    pix.save(f"D:/usr/page_{page_num}.png")
--- a/AI/Pdf/处理步骤.docx
+++ b/AI/Pdf/处理步骤.docx
--- a/AI/Pdf/配置环境.md
+++ b/AI/Pdf/配置环境.md
@ -0,0 +1,92 @@
+### 1、查看有哪些环境
+```cmd
+conda info --envs
+```
+```angular2html
+# conda environments:
+#
+base                     D:\anaconda3
+py310                    D:\anaconda3\envs\py310
+                      *  d:\anaconda3
+```
+### 2、激活环境
+```cmd
+conda activate py310
+```
+
+### 3、设置为默认源
+升级pip + 配置pip使用清华源 
+``` cmd
+python -m pip install --upgrade pip
+pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+```
+
+### 4、安装pip
+```shell
+pip install PyMuPDF 
+pip install opencv-python paddleocr
+
+
+# 版本兼容性是深度学习应用中常见的问题，特别是当库频繁更新时。推荐使用PaddlePaddle 2.5.2 + PaddleOCR最新版的组合，这通常是最稳定的配置。
+
+# 没用GPU的用这个
+pip install paddlepaddle==2.5.2
+# 有GPU的用这个
+pip install paddlepaddle-gpu==2.5.2
+
+# 卸载
+pip uninstall paddlepaddle==2.5.2 -y
+pip uninstall paddlepaddle-gpu==2.5.2 -y
+
+# 更新
+pip install --upgrade paddleocr
+```
+
+【保姆级】Windows 安装 CUDA 和 cuDNN
+https://sspai.com/post/98397
+https://developer.download.nvidia.com/compute/cuda/12.9.1/local_installers/cuda_12.9.1_576.57_windows.exe
+
+10402852@qq.com
+dsideal4r5t6y7u
+
+
+[正确安装GPU显卡驱动、CUDA、cuDNN的详细教程](https://blog.csdn.net/qq_62928482/article/details/139674918)
+
+我的笔记本显卡：NVIDIA GeForce RTX 3060 Laptop GPU
+
+
+
+**nvidia-smi**
+
+
+
+NVIDIA-SMI 576.57                 Driver Version: 576.57         CUDA Version: 12.9  
+
+
+
+[CUDA - Wikipedia](https://en.wikipedia.org/wiki/CUDA)
+
+![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250618074946862.png)
+
+![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250618075233227.png)
+
+
+https://developer.download.nvidia.cn/compute/cuda/11.4.4/local_installers/cuda_11.4.4_472.50_windows.exe
+
+
+
+10402852@qq.com
+
+dsideal4r5t6y7u
+
+
+方案介绍：
+
+https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/algorithm/PP-StructureV3/PP-StructureV3.html
+
+使用教程：
+
+https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/pipeline_usage/PP-StructureV3.html
+
+
+paddleocr pp_structurev3 -i https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/pp_structure_v3_demo.png
--- a/AI/错误日志.txt
+++ b/AI/错误日志.txt
@ -0,0 +1,51 @@
+D:\anaconda3\envs\py310\python.exe D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py
+Creating model: ('PP-LCNet_x1_0_doc_ori', None)
+Using official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in C:\Users\Administrator\.paddlex\official_models.
+Traceback (most recent call last):
+  File "D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py", line 29, in <module>
+    pdf_to_text(source, target)
+  File "D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py", line 10, in pdf_to_text
+    ocr = PaddleOCR(use_textline_orientation=True, lang="ch", ocr_version='PP-OCRv3')
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\ocr.py", line 161, in __init__
+    super().__init__(**base_params)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\base.py", line 63, in __init__
+    self.paddlex_pipeline = self._create_paddlex_pipeline()
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\base.py", line 97, in _create_paddlex_pipeline
+    return create_pipeline(config=self._merged_paddlex_config, **kwargs)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\__init__.py", line 165, in create_pipeline
+    pipeline = BasePipeline.get(pipeline_name)(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\utils\deps.py", line 195, in _wrapper
+    return old_init_func(self, *args, **kwargs)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 103, in __init__
+    self._pipeline = self._create_internal_pipeline(config, self.device)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 158, in _create_internal_pipeline
+    return self._pipeline_cls(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 73, in __init__
+    self.doc_preprocessor_pipeline = self.create_pipeline(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\base.py", line 140, in create_pipeline
+    pipeline = create_pipeline(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\__init__.py", line 165, in create_pipeline
+    pipeline = BasePipeline.get(pipeline_name)(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\utils\deps.py", line 195, in _wrapper
+    return old_init_func(self, *args, **kwargs)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 103, in __init__
+    self._pipeline = self._create_internal_pipeline(config, self.device)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 158, in _create_internal_pipeline
+    return self._pipeline_cls(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\doc_preprocessor\pipeline.py", line 67, in __init__
+    self.doc_ori_classify_model = self.create_model(doc_ori_classify_config)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\base.py", line 107, in create_model
+    model = create_predictor(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\__init__.py", line 77, in create_predictor
+    return BasePredictor.get(model_name)(
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\image_classification\predictor.py", line 49, in __init__
+    self.preprocessors, self.infer, self.postprocessors = self._build()
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\image_classification\predictor.py", line 82, in _build
+    infer = self.create_static_infer()
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 240, in create_static_infer
+    return PaddleInfer(self.model_dir, self.MODEL_FILE_PREFIX, self._pp_option)
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\common\static_infer.py", line 274, in __init__
+    self.predictor = self._create()
+  File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\common\static_infer.py", line 360, in _create
+    config.set_optimization_level(3)
+AttributeError: 'paddle.base.libpaddle.AnalysisConfig' object has no attribute 'set_optimization_level'. Did you mean: 'tensorrt_optimization_level'?
--- a/WebRoot/upload/temp_upload/compressed_水杯.png
+++ b/WebRoot/upload/temp_upload/compressed_水杯.png