You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

52 lines
1.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import Dispatch
# 工作目录
workingPath = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\16个州市报告2022\分析报告20240510'
# 修复Word文档
# 经过反复测试发现WORD文档中的图表有些POI是无法正确读取的本来是Sheet1,结果它不认识说只有一个Sheet0,此时就无法正确读取数据了。
# 而我通过python+win32com.client.Dispatch可以读取到直接保存就修复了这个BUG真是太神奇了
def repairWord(docPath):
docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open(docPath)
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc.Close()
docApp.Quit()
if __name__ == '__main__':
# 1、修复两层扩展名.docx
for file in os.listdir(workingPath):
if file.endswith('.docx.docx'):
# 完整的路径名称
docPath = os.path.join(workingPath, file)
print("文件名有误,已修复:" + docPath)
os.rename(docPath, docPath.replace('.docx.docx', '.docx'))
# 2、修复图表异常问题
for file in os.listdir(workingPath):
if file.endswith('.docx'):
# 开始修复文档
repairWord(docPath)
print("修复完成")