You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
31 lines
883 B
31 lines
883 B
# 导入正则表达式模块
|
|
import re
|
|
|
|
|
|
def extract_chinese_surnames(file_path):
|
|
# 初始化一个空列表来存储中文姓氏
|
|
surnames = []
|
|
|
|
# 打开文件并读取每一行
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
for line in file:
|
|
# 使用正则表达式匹配中文字符
|
|
# \u4e00-\u9fff 表示匹配任何中文汉字的Unicode编码范围
|
|
surname = re.findall(r'[\u4e00-\u9fff]+', line)
|
|
|
|
# 如果找到中文字符,将其作为姓氏添加到列表中
|
|
if surname:
|
|
surnames.extend(surname)
|
|
|
|
return surnames
|
|
|
|
|
|
# 调用函数并传入文件路径
|
|
file_path = '../db.txt' # 这里需要替换成你的文件实际路径
|
|
surnames = extract_chinese_surnames(file_path)
|
|
|
|
# 打印结果
|
|
print(surnames)
|
|
|
|
print('南宫' in surnames)
|