You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.
# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com . client import Dispatch
# 工作目录
workingPath = r ' D: \ dsWork \ YunNanDsBase \ Doc \ 全省及州市县区人口与教育报告集20241023 \ 16个州市报告2022 \ 分析报告20240510 '
# 修复Word文档
# 经过反复测试发现, WORD文档中的图表, 有些POI是无法正确读取的, 本来是Sheet1,结果它不认识, 说只有一个Sheet0,此时就无法正确读取数据了。
# 而我通过python+win32com.client.Dispatch可以读取到, 直接保存, 就修复了这个BUG, 真是太神奇了!
def repairWord ( docPath ) :
docApp = win32com . client . Dispatch ( ' Word.Application ' )
# 是不是打Word显示
docApp . Visible = False
docApp . DisplayAlerts = 0
doc = docApp . Documents . Open ( docPath )
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc . InlineShapes :
if inline_shape . Type == win32com . client . constants . wdInlineShapeChart : # 检查是否为内嵌图表
shape = doc . InlineShapes ( idx )
sheet = shape . Chart . ChartData . Workbook . Worksheets ( " Sheet1 " )
print ( sheet . Name )
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc . Close ( )
docApp . Quit ( )
if __name__ == ' __main__ ' :
# 1、修复两层扩展名.docx
for file in os . listdir ( workingPath ) :
if file . endswith ( ' .docx.docx ' ) :
# 完整的路径名称
docPath = os . path . join ( workingPath , file )
print ( " 文件名有误,已修复: " + docPath )
os . rename ( docPath , docPath . replace ( ' .docx.docx ' , ' .docx ' ) )
# 2、修复图表异常问题
for file in os . listdir ( workingPath ) :
if file . endswith ( ' .docx ' ) :
# 开始修复文档
repairWord ( docPath )
print ( " 修复完成 " )