package com.dsideal.base.Tools.Test; import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit; import com.jfinal.kit.StrKit; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; import org.xml.sax.SAXException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; public class TestOutSideExcel { /** * 解压缩 * * @throws IOException */ public static void UnCompress(String wordPath, String workingPath) throws IOException { workingPath = workingPath.replace("\\", "/"); if (!workingPath.endsWith("/")) workingPath += "/"; File file = new File(wordPath);//取得word文件 FileInputStream inputStream = new FileInputStream(file); ZipInputStream zipInputStream = new ZipInputStream(inputStream); ZipEntry entry; byte[] ch = new byte[256]; while ((entry = zipInputStream.getNextEntry()) != null) { File zFile = new File(workingPath + entry.getName()); if (entry.isDirectory()) { if (!zFile.exists()) { zFile.mkdirs(); } zipInputStream.closeEntry(); } else { File fpath = new File(zFile.getParent()); if (!fpath.exists()) { fpath.mkdirs(); } FileOutputStream outputStream = new FileOutputStream(zFile); int i; while ((i = zipInputStream.read(ch)) != -1) { outputStream.write(ch, 0, i); } zipInputStream.closeEntry(); outputStream.close(); } } inputStream.close(); } /** * 读取指定的Chart数据 * * @param workingPath * @param chartNumber */ public static List> readChart(String workingPath, int chartNumber) throws DocumentException { List> matrix = new ArrayList<>(); String xml = workingPath + "\\word\\charts\\chart" + chartNumber + ".xml"; if (!(new File(xml).exists())) { System.out.println("没有找到第" + chartNumber + "个图表"); return matrix; } //3、开始读取 // 创建 SAXReader 对象,读取 XML 文件 SAXReader reader = new SAXReader(); Document document = reader.read(new File(xml)); // 获取根元素 Element root = document.getRootElement(); //折线图 //将xml用IDEA打开,搜索关键的数据值,然后右键查看XPATH完整路径可以获取到下面的路径 ///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:cat/c:numRef/c:numCache/c:pt/c:v //在每个已知的图表类型中查找,找到后跳出循环 //声明一个数组,图表的所有类型 String[] CHART_TYPES = {"lineChart", "barChart"};//折线,柱状 String type = ""; for (String chartType : CHART_TYPES) { if (root.element("chart").element("plotArea") .element(chartType) != null) { type = chartType; break; } } if (StrKit.isBlank(type)) { System.out.println("没有找到图表类型,请扩充图表类型"); System.out.println(root.element("chart").element("plotArea")); System.exit(-1); } List xList = root.element("chart").element("plotArea").element(type).element("ser").element("cat") .element("numRef").element("numCache").elements("pt"); List listObject = new ArrayList<>(); ///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:val/c:numRef/c:numCache for (Element ser : root.element("chart").element("plotArea").element(type).elements("ser")) { List yList = ser.element("val").element("numRef").element("numCache").elements("pt"); //记录都有哪些有效数值和索引号 Map map = new HashMap<>(); for (Element e : yList) { map.put(Integer.parseInt(e.attribute("idx").getValue()), e); } List list = new ArrayList<>(); for (int i = 0; i < xList.size(); i++) { list.add(map.getOrDefault(i, null)); } listObject.add(list); } //数据是按行读取的 for (int i = 0; i < listObject.size(); i++) { List row = new ArrayList<>(); List lo = (List) listObject.get(i); for (Element e : lo) { if (e == null) { row.add(null); } else { row.add(e.element("v").getText()); } } matrix.add(row); } //上面生成的数据格式需要行转列,横坐标是年份,纵坐标是数据 int rowCount = matrix.size();// 计算行数和列数 int colCount = matrix.getFirst().size(); // 创建一个一维列表,用于存储转换后的列 List> transposed = new ArrayList<>(); // 遍历每一列 for (int col = 0; col < colCount; col++) { // 创建一个新的内部列表,用于存储当前列的所有行 List column = new ArrayList<>(); // 遍历每一行,将当前列的值添加到新的内部列表中 for (int row = 0; row < rowCount; row++) { column.add(matrix.get(row).get(col)); } // 将当前列添加到结果列表中 transposed.add(column); } // 在每一行的第一列插入年份 for (int i = 0; i < transposed.size(); i++) { List column = transposed.get(i); column.addFirst(xList.get(i).element("v").getText()); // 在每行的开始插入序号 } return transposed; } public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException, ParserConfigurationException, SAXException, XPathExpressionException, DocumentException { String sourceDoc = "c:/西双版纳州人口变化及其对教育的影响20240420.docx"; //1、将word文件解压缩 String workingPath = "C:\\zipFile"; UnCompress(sourceDoc, workingPath); //2、我们需要第几个图表 for (int chartNumber = 1; chartNumber <= 37; chartNumber++) { System.out.println("正在处理第" + chartNumber + "个图表的信息~"); //读取图表 List> list = readChart(workingPath, chartNumber); ExcelKit.printTable(list); System.out.println("========================================================="); } } }