main
HuangHai 2 weeks ago
parent f19d70ac8c
commit 841c0d929f

@ -1,54 +0,0 @@
import easyocr
import os
import re
import pandas as pd
class id_card_ocr():
def __init__(self): # 文件位置
self.images = r'D:/id_card' # 需要注意的是,图片文件的名称不能有汉字,否则会报错~
def ocr_reader(self): # 创建ocr对象识别中英文
ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True)
return ocr
def read_content(self): # 识别图片文字,并遍历
data = []
for image in os.listdir(self.images):
content = self.ocr_reader().readtext(f'{self.images}/{image}', detail=0)
content = ''.join(content) # 列表转换为纯文本
new_content = content.replace(" ", "") # 去除掉空格内容
print(f'正在识别:{image}')
name = re.findall(r'名(.*?)性', new_content)
gender = re.findall(r'别(.*?)民族|民', new_content)
nation = re.findall(r'族|民族(.*?)出', new_content)
address = re.findall(r'址(.*?)公', new_content)
number = re.findall(r'身份号码(\d+)', new_content)
new_name = ''.join(name)
new_gender = ''.join(gender)
new_nation = ''.join(nation)
new_address = ''.join(address)
new_number = ''.join(number)
if len(new_number) == 18: # 判断身份证的位数
pass
elif len(new_number) == 17:
new_number = new_number + "X"
print(f'完成识别:{image}')
data.append([new_name, new_gender, new_nation, new_address, new_number])
print(data)
return data
def read_to_excel(self):
df = pd.DataFrame(self.read_content(), columns=['姓名', '性别', '民族', '地址', '身份证号码'])
print(f'识别结果如下:')
print(df)
df.to_excel(r'D:/id_card/识别结果.xlsx', index=False)
return df
if __name__ == '__main__':
info = id_card_ocr()
info.read_content()
info.read_to_excel()

@ -0,0 +1,4 @@
conda activate py310
python3 -m pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
pip install "paddleocr>=3.0.1"

@ -0,0 +1,7 @@
import fitz # PyMuPDF
source=r'D:\BaiduNetdiskDownload\大数据研究苏轼\[047.中国古典文学基本丛书.苏轼词编年校注].王宗堂,邹同庆撰.扫描版.pdf'
pdf = fitz.open(source)
for page_num in range(len(pdf)):
page = pdf.load_page(page_num)
pix = page.get_pixmap(dpi=300)
pix.save(f"D:/usr/page_{page_num}.png")

Binary file not shown.

@ -0,0 +1,92 @@
### 1、查看有哪些环境
```cmd
conda info --envs
```
```angular2html
# conda environments:
#
base D:\anaconda3
py310 D:\anaconda3\envs\py310
* d:\anaconda3
```
### 2、激活环境
```cmd
conda activate py310
```
### 3、设置为默认源
升级pip + 配置pip使用清华源
``` cmd
python -m pip install --upgrade pip
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
```
### 4、安装pip
```shell
pip install PyMuPDF
pip install opencv-python paddleocr
# 版本兼容性是深度学习应用中常见的问题特别是当库频繁更新时。推荐使用PaddlePaddle 2.5.2 + PaddleOCR最新版的组合这通常是最稳定的配置。
# 没用GPU的用这个
pip install paddlepaddle==2.5.2
# 有GPU的用这个
pip install paddlepaddle-gpu==2.5.2
# 卸载
pip uninstall paddlepaddle==2.5.2 -y
pip uninstall paddlepaddle-gpu==2.5.2 -y
# 更新
pip install --upgrade paddleocr
```
【保姆级】Windows 安装 CUDA 和 cuDNN
https://sspai.com/post/98397
https://developer.download.nvidia.com/compute/cuda/12.9.1/local_installers/cuda_12.9.1_576.57_windows.exe
10402852@qq.com
dsideal4r5t6y7u
[正确安装GPU显卡驱动、CUDA、cuDNN的详细教程](https://blog.csdn.net/qq_62928482/article/details/139674918)
我的笔记本显卡NVIDIA GeForce RTX 3060 Laptop GPU
**nvidia-smi**
NVIDIA-SMI 576.57 Driver Version: 576.57 CUDA Version: 12.9
[CUDA - Wikipedia](https://en.wikipedia.org/wiki/CUDA)
![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250618074946862.png)
![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250618075233227.png)
https://developer.download.nvidia.cn/compute/cuda/11.4.4/local_installers/cuda_11.4.4_472.50_windows.exe
10402852@qq.com
dsideal4r5t6y7u
方案介绍:
https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/algorithm/PP-StructureV3/PP-StructureV3.html
使用教程:
https://paddlepaddle.github.io/PaddleOCR/latest/version3.x/pipeline_usage/PP-StructureV3.html
paddleocr pp_structurev3 -i https://paddle-model-ecology.bj.bcebos.com/paddlex/imgs/demo_image/pp_structure_v3_demo.png

@ -0,0 +1,51 @@
D:\anaconda3\envs\py310\python.exe D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py
Creating model: ('PP-LCNet_x1_0_doc_ori', None)
Using official model (PP-LCNet_x1_0_doc_ori), the model files will be automatically downloaded and saved in C:\Users\Administrator\.paddlex\official_models.
Traceback (most recent call last):
File "D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py", line 29, in <module>
pdf_to_text(source, target)
File "D:\dsWork\QingLong\AI\Pdf\T2_AutoAll.py", line 10, in pdf_to_text
ocr = PaddleOCR(use_textline_orientation=True, lang="ch", ocr_version='PP-OCRv3')
File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\ocr.py", line 161, in __init__
super().__init__(**base_params)
File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\base.py", line 63, in __init__
self.paddlex_pipeline = self._create_paddlex_pipeline()
File "D:\anaconda3\envs\py310\lib\site-packages\paddleocr\_pipelines\base.py", line 97, in _create_paddlex_pipeline
return create_pipeline(config=self._merged_paddlex_config, **kwargs)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\__init__.py", line 165, in create_pipeline
pipeline = BasePipeline.get(pipeline_name)(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\utils\deps.py", line 195, in _wrapper
return old_init_func(self, *args, **kwargs)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 103, in __init__
self._pipeline = self._create_internal_pipeline(config, self.device)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 158, in _create_internal_pipeline
return self._pipeline_cls(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\ocr\pipeline.py", line 73, in __init__
self.doc_preprocessor_pipeline = self.create_pipeline(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\base.py", line 140, in create_pipeline
pipeline = create_pipeline(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\__init__.py", line 165, in create_pipeline
pipeline = BasePipeline.get(pipeline_name)(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\utils\deps.py", line 195, in _wrapper
return old_init_func(self, *args, **kwargs)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 103, in __init__
self._pipeline = self._create_internal_pipeline(config, self.device)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\_parallel.py", line 158, in _create_internal_pipeline
return self._pipeline_cls(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\doc_preprocessor\pipeline.py", line 67, in __init__
self.doc_ori_classify_model = self.create_model(doc_ori_classify_config)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\pipelines\base.py", line 107, in create_model
model = create_predictor(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\__init__.py", line 77, in create_predictor
return BasePredictor.get(model_name)(
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\image_classification\predictor.py", line 49, in __init__
self.preprocessors, self.infer, self.postprocessors = self._build()
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\image_classification\predictor.py", line 82, in _build
infer = self.create_static_infer()
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\base\predictor\base_predictor.py", line 240, in create_static_infer
return PaddleInfer(self.model_dir, self.MODEL_FILE_PREFIX, self._pp_option)
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\common\static_infer.py", line 274, in __init__
self.predictor = self._create()
File "D:\anaconda3\envs\py310\lib\site-packages\paddlex\inference\models\common\static_infer.py", line 360, in _create
config.set_optimization_level(3)
AttributeError: 'paddle.base.libpaddle.AnalysisConfig' object has no attribute 'set_optimization_level'. Did you mean: 'tensorrt_optimization_level'?

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Loading…
Cancel
Save