黄海 8 months ago
commit 8dbc147f39

@ -0,0 +1,51 @@
# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import Dispatch
# 工作目录
workingPath = r'D:\dsWork\YunNanDsBase\Doc\全省及州市县区人口与教育报告集20241023\16个州市报告2022\分析报告20240510'
# 修复Word文档
# 经过反复测试发现WORD文档中的图表有些POI是无法正确读取的本来是Sheet1,结果它不认识说只有一个Sheet0,此时就无法正确读取数据了。
# 而我通过python+win32com.client.Dispatch可以读取到直接保存就修复了这个BUG真是太神奇了
def repairWord(docPath):
docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
doc = docApp.Documents.Open(docPath)
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 下一个图表的索引号
idx = idx + 1
# 关闭文档和Word应用
doc.Close()
docApp.Quit()
if __name__ == '__main__':
# 1、修复两层扩展名.docx
for file in os.listdir(workingPath):
if file.endswith('.docx.docx'):
# 完整的路径名称
docPath = os.path.join(workingPath, file)
print("文件名有误,已修复:" + docPath)
os.rename(docPath, docPath.replace('.docx.docx', '.docx'))
# 2、修复图表异常问题
for file in os.listdir(workingPath):
if file.endswith('.docx'):
# 开始修复文档
repairWord(docPath)
print("修复完成")

@ -7,6 +7,20 @@ docApp = win32com.client.Dispatch('Word.Application')
# 是不是打Word显示
docApp.Visible = False
docApp.DisplayAlerts = 0
<<<<<<< HEAD
doc = docApp.Documents.Open("c:/b.docx")
#
# # 遍历文档中的所有内嵌形状
idx = 1
for inline_shape in doc.InlineShapes:
if inline_shape.Type == win32com.client.constants.wdInlineShapeChart: # 检查是否为内嵌图表
# 获取图表的标题,此项目中图表没有标题
shape = doc.InlineShapes(idx)
sheet = shape.Chart.ChartData.Workbook.Worksheets("Sheet1")
# 下一个图表的索引号
idx = idx + 1
=======
working_dir = r"D:/dsWork/YunNanDsBase/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/"
# doc = docApp.Documents.Open('c:/1.docx')
@ -45,6 +59,7 @@ for inline_shape in doc.InlineShapes:
idx = idx + 1
print(idx-1)
>>>>>>> cfa48d4d9b83ad3f82bc55fd60f8926d113dfe09
# 关闭文档和Word应用
doc.Close()

@ -0,0 +1,56 @@
package com.dsideal.base.Test;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFChart;
import org.openxmlformats.schemas.drawingml.x2006.chart.CTChart;
import org.openxmlformats.schemas.drawingml.x2006.chart.CTSerTx;
import org.openxmlformats.schemas.drawingml.x2006.chart.CTTitle;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import java.io.FileInputStream;
import java.util.List;
public class Test1 {
public static void main(String[] args) throws Exception {
FileInputStream fis = new FileInputStream("c:\\b.docx");
XWPFDocument document = new XWPFDocument(fis);
// 获取文档中的第一个图表
List<XWPFChart> shapes = document.getCharts();
for (int i = 0; i < shapes.size(); i++) {
XSSFWorkbook workbook = shapes.get(i).getWorkbook();
XSSFSheet sheet = workbook.getSheet(workbook.getSheetName(0));
if(i==21)
{
System.out.println(11111);
}
XSSFRow row = sheet.getRow(0);
System.out.println(row);
System.out.println(row.getCell(0));
System.out.println(i);
}
fis.close();
}
}
Loading…
Cancel
Save