import easyocr import os import re import pandas as pd class id_card_ocr(): def __init__(self): # 文件位置 self.images = r'D:/id_card' # 需要注意的是,图片文件的名称不能有汉字,否则会报错~ def ocr_reader(self): # 创建ocr对象,识别中英文 ocr = easyocr.Reader(['ch_sim', 'en'], gpu=True) return ocr def read_content(self): # 识别图片文字,并遍历 data = [] for image in os.listdir(self.images): content = self.ocr_reader().readtext(f'{self.images}/{image}', detail=0) content = ''.join(content) # 列表转换为纯文本 new_content = content.replace(" ", "") # 去除掉空格内容 print(f'正在识别:{image}') name = re.findall(r'名(.*?)性', new_content) gender = re.findall(r'别(.*?)民族|民', new_content) nation = re.findall(r'族|民族(.*?)出', new_content) address = re.findall(r'址(.*?)公', new_content) number = re.findall(r'身份号码(\d+)', new_content) new_name = ''.join(name) new_gender = ''.join(gender) new_nation = ''.join(nation) new_address = ''.join(address) new_number = ''.join(number) if len(new_number) == 18: # 判断身份证的位数 pass elif len(new_number) == 17: new_number = new_number + "X" print(f'完成识别:{image}') data.append([new_name, new_gender, new_nation, new_address, new_number]) print(data) return data def read_to_excel(self): df = pd.DataFrame(self.read_content(), columns=['姓名', '性别', '民族', '地址', '身份证号码']) print(f'识别结果如下:') print(df) df.to_excel(r'D:/id_card/识别结果.xlsx', index=False) return df if __name__ == '__main__': info = id_card_ocr() info.read_content() info.read_to_excel()