You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
2.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import Dispatch
# 工作目录
workingPath = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\16个州市报告2022\分析报告20240510'
# 修复Word文档
# 经过反复测试发现WORD文档中的图表有些POI是无法正确读取的本来是Sheet1,结果它不认识说只有一个Sheet0,此时就无法正确读取数据了。
# 而我通过python+win32com.client.Dispatch可以读取到直接保存就修复了这个BUG真是太神奇了
def repairWord(docPath):
docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open(docPath)
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc.Close()
docApp.Quit()
if __name__ == '__main__':
# 遍历工作目录下所有的docx
for file in os.listdir(workingPath):
if file.endswith('.docx'):
# 完整的路径名称
docPath = os.path.join(workingPath, file)
# 如果完整的文件名是以.docx.docx结尾的就修改为.docx结尾
if docPath.endswith('.docx.docx'):
docPath = docPath.replace('.docx.docx', '.docx')
print("文件名有误,已修复:" + docPath)
os.rename(docPath, docPath.replace('.docx.docx', '.docx'))
print("正在修复文档:" + file)
# 开始修复文档
repairWord(docPath)
print("修复完成")