You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

31 lines
883 B

# 导入正则表达式模块
import re
def extract_chinese_surnames(file_path):
# 初始化一个空列表来存储中文姓氏
surnames = []
# 打开文件并读取每一行
with open(file_path, 'r', encoding='utf-8') as file:
for line in file:
# 使用正则表达式匹配中文字符
# \u4e00-\u9fff 表示匹配任何中文汉字的Unicode编码范围
surname = re.findall(r'[\u4e00-\u9fff]+', line)
# 如果找到中文字符,将其作为姓氏添加到列表中
if surname:
surnames.extend(surname)
return surnames
# 调用函数并传入文件路径
file_path = '../db.txt' # 这里需要替换成你的文件实际路径
surnames = extract_chinese_surnames(file_path)
# 打印结果
print(surnames)
print('南宫' in surnames)