You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

53 lines
1.9 KiB

9 months ago
# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import Dispatch
# 工作目录
workingPath = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\16个州市报告2022\分析报告20240510'
# 修复Word文档
# 经过反复测试发现WORD文档中的图表有些POI是无法正确读取的本来是Sheet1,结果它不认识说只有一个Sheet0,此时就无法正确读取数据了。
# 而我通过python+win32com.client.Dispatch可以读取到直接保存就修复了这个BUG真是太神奇了
def repairWord(docPath):
docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open(docPath)
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
9 months ago
print(sheet.Name)
9 months ago
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc.Close()
docApp.Quit()
if __name__ == '__main__':
9 months ago
# 1、修复两层扩展名.docx
9 months ago
for file in os.listdir(workingPath):
9 months ago
if file.endswith('.docx.docx'):
9 months ago
# 完整的路径名称
docPath = os.path.join(workingPath, file)
9 months ago
print("文件名有误,已修复:" + docPath)
os.rename(docPath, docPath.replace('.docx.docx', '.docx'))
# 2、修复图表异常问题
for file in os.listdir(workingPath):
if file.endswith('.docx'):
9 months ago
# 开始修复文档
repairWord(docPath)
print("修复完成")