main
黄海 8 months ago
parent e93de481e2
commit 93fcab72a9

@ -0,0 +1,51 @@
# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import Dispatch
# 工作目录
workingPath = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\16个州市报告2022\分析报告20240510'
# 修复Word文档
# 经过反复测试发现WORD文档中的图表有些POI是无法正确读取的本来是Sheet1,结果它不认识说只有一个Sheet0,此时就无法正确读取数据了。
# 而我通过python+win32com.client.Dispatch可以读取到直接保存就修复了这个BUG真是太神奇了
def repairWord(docPath):
docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open(docPath)
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc.Close()
docApp.Quit()
if __name__ == '__main__':
# 遍历工作目录下所有的docx
for file in os.listdir(workingPath):
if file.endswith('.docx'):
# 完整的路径名称
docPath = os.path.join(workingPath, file)
# 如果完整的文件名是以.docx.docx结尾的就修改为.docx结尾
if docPath.endswith('.docx.docx'):
docPath = docPath.replace('.docx.docx', '.docx')
print("文件名有误,已修复:" + docPath)
os.rename(docPath, docPath.replace('.docx.docx', '.docx'))
print("正在修复文档:" + file)
# 开始修复文档
repairWord(docPath)
print("修复完成")

@ -8,28 +8,15 @@ docApp = win32com.client.Dispatch('Word.Application')
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open("c:/昭通市.docx")
index_tubiao = 2
# 遍历文档中的所有内嵌形状
doc = docApp.Documents.Open("c:/b.docx")
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
if idx == index_tubiao:
shape = doc.InlineShapes(idx)
# 获取图表的标题,此项目中图表没有标题
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 行数
row_size = sheet.UsedRange.rows.Count
# 列数
col_size = sheet.UsedRange.columns.Count
# 遍历获取表格中的数据
for i in range(1, row_size + 1):
for j in range(1, col_size + 1):
print(sheet.Cells(i, j).Value, end=" ")
print("")
print("")
# 获取图表的标题,此项目中图表没有标题
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 下一个图表的索引号
idx = idx + 1

@ -25,19 +25,20 @@ public class C9_Test {
LocalMysqlConnectUtil.Init();
//实例化
ReadDocxUtil ru = new ReadDocxUtil();
//判断是否为docx文件
//读取文件
String inputUrl = "c:/昭通市.docx";
String inputUrl = "c:/b.docx";
InputStream is = new FileInputStream(inputUrl);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//排序后的图表
List<XWPFChart> charts = ExcelKit.getSortListForXWPFChart(doc.getCharts());
XSSFWorkbook workbook = charts.get(3).getWorkbook();
List<List<String>> source1 = ExcelKit.readSheet(workbook, 1);
System.out.println(source1);
for (int i = 0; i < charts.size(); i++) {
XSSFWorkbook workbook = charts.get(i).getWorkbook();
List<List<String>> source1 = ExcelKit.readSheet(workbook, 1);
System.out.println(source1);
}
}
}

Loading…
Cancel
Save