You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
50 lines
2.3 KiB
50 lines
2.3 KiB
import os
|
|
import win32com
|
|
from win32com.client import Dispatch
|
|
import re
|
|
|
|
# pip install pywin32 openpyxl
|
|
# pip install pywin32
|
|
working_dir = r"D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\133个县区报告2022\县区研究报告"
|
|
import openpyxl
|
|
|
|
# 在工作目录下创建Excel目录
|
|
excel_dir = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\133个县区报告2022\Excel'
|
|
if not os.path.exists(excel_dir):
|
|
os.mkdir(excel_dir)
|
|
# 遍历working_dir目录下的所有子文件夹
|
|
for root, dirs, files in os.walk(working_dir):
|
|
for dir in dirs:
|
|
# 获取县区名称
|
|
county_name = dir
|
|
# 获取县区文件夹路径
|
|
county_dir = os.path.join(root, dir)
|
|
# 遍历县区文件夹下的所有文件
|
|
for file in os.listdir(county_dir):
|
|
# 获取文件路径
|
|
file_path = os.path.join(county_dir, file)
|
|
# 判断文件是否是Word文档
|
|
if file_path.endswith('.docx') and not file.startswith('~'):
|
|
areaName = file
|
|
areaName = re.sub(r'[^\u4e00-\u9fa5]', '', areaName)
|
|
if '市' not in areaName and '县' not in areaName and '区' not in areaName:
|
|
continue
|
|
# 打开文件文件,按行读取
|
|
with open('replaceBlank.txt', 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
# 去除每行前后的空白字符,包括空格、制表符和换行符
|
|
line = line.strip()
|
|
# 将文本中的关键字替换为空字符串
|
|
areaName = areaName.replace(line, '')
|
|
# 打开文件文件,按行读取
|
|
with open('replaceText.txt', 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
# 去除每行前后的空白字符,包括空格、制表符和换行符
|
|
line = line.strip()
|
|
# 将文本中的关键字替换为空字符串
|
|
areaName = areaName.replace(line.split(' ')[0], line.split(' ')[1])
|
|
#
|
|
print(f"正在处理文件:{areaName}")
|
|
|
|
print("恭喜,所有县区数据整理工作成功完成!")
|