You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

54 lines
2.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import easyocr
import os
import re
import pandas as pd
class id_card_ocr():
def __init__(self): # 文件位置
self.images = r'D:/id_card' # 需要注意的是,图片文件的名称不能有汉字,否则会报错~
def ocr_reader(self): # 创建ocr对象识别中英文
ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True)
return ocr
def read_content(self): # 识别图片文字,并遍历
data = []
for image in os.listdir(self.images):
content = self.ocr_reader().readtext(f'{self.images}/{image}', detail=0)
content = ''.join(content) # 列表转换为纯文本
new_content = content.replace(" ", "") # 去除掉空格内容
print(f'正在识别:{image}')
name = re.findall(r'名(.*?)性', new_content)
gender = re.findall(r'别(.*?)民族|民', new_content)
nation = re.findall(r'族|民族(.*?)出', new_content)
address = re.findall(r'址(.*?)公', new_content)
number = re.findall(r'身份号码(\d+)', new_content)
new_name = ''.join(name)
new_gender = ''.join(gender)
new_nation = ''.join(nation)
new_address = ''.join(address)
new_number = ''.join(number)
if len(new_number) == 18: # 判断身份证的位数
pass
elif len(new_number) == 17:
new_number = new_number + "X"
print(f'完成识别:{image}')
data.append([new_name, new_gender, new_nation, new_address, new_number])
print(data)
return data
def read_to_excel(self):
df = pd.DataFrame(self.read_content(), columns=['姓名', '性别', '民族', '地址', '身份证号码'])
print(f'识别结果如下:')
print(df)
df.to_excel(r'D:/id_card/识别结果.xlsx', index=False)
return df
if __name__ == '__main__':
info = id_card_ocr()
info.read_content()
info.read_to_excel()