You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
2.3 KiB

import os
import win32com
from win32com.client import Dispatch
import re
# pip install pywin32 openpyxl
# pip install pywin32
working_dir = r"D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\133个县区报告2022\县区研究报告"
import openpyxl
# 在工作目录下创建Excel目录
excel_dir = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\133个县区报告2022\Excel'
if not os.path.exists(excel_dir):
os.mkdir(excel_dir)
# 遍历working_dir目录下的所有子文件夹
for root, dirs, files in os.walk(working_dir):
for dir in dirs:
# 获取县区名称
county_name = dir
# 获取县区文件夹路径
county_dir = os.path.join(root, dir)
# 遍历县区文件夹下的所有文件
for file in os.listdir(county_dir):
# 获取文件路径
file_path = os.path.join(county_dir, file)
# 判断文件是否是Word文档
if file_path.endswith('.docx') and not file.startswith('~'):
areaName = file
areaName = re.sub(r'[^\u4e00-\u9fa5]', '', areaName)
if '' not in areaName and '' not in areaName and '' not in areaName:
continue
# 打开文件文件,按行读取
with open('replaceBlank.txt', 'r', encoding='utf-8') as f:
for line in f:
# 去除每行前后的空白字符,包括空格、制表符和换行符
line = line.strip()
# 将文本中的关键字替换为空字符串
areaName = areaName.replace(line, '')
# 打开文件文件,按行读取
with open('replaceText.txt', 'r', encoding='utf-8') as f:
for line in f:
# 去除每行前后的空白字符,包括空格、制表符和换行符
line = line.strip()
# 将文本中的关键字替换为空字符串
areaName = areaName.replace(line.split(' ')[0], line.split(' ')[1])
#
print(f"正在处理文件:{areaName}")
print("恭喜,所有县区数据整理工作成功完成!")