From b785e66487b562b0a3fe90ed87fa26b12d5e436c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Tue, 19 Nov 2024 10:39:00 +0800 Subject: [PATCH] 'commit' --- .../Tools/FillData/ExcelKit/ExcelKit.java | 152 ++++++++++++++++++ .../base/Tools/Test/TestOutSideExcel.java | 149 +---------------- 2 files changed, 154 insertions(+), 147 deletions(-) diff --git a/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java b/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java index f31d5177..fdb46e3d 100644 --- a/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java +++ b/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java @@ -11,12 +11,22 @@ import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.usermodel.XWPFChart; import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.io.SAXReader; import java.io.*; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; public class ExcelKit { + // docx文件解压目录 + public static String DocxUnzipDirectory = "C:\\zipFile\\"; /** * 将xls转换为xlsx @@ -527,4 +537,146 @@ public class ExcelKit { is.close(); } + /** + * 解压缩 + * + * @throws IOException + */ + public static void UnCompress(String wordPath) throws IOException { + File file = new File(wordPath);//取得word文件 + FileInputStream inputStream = new FileInputStream(file); + ZipInputStream zipInputStream = new ZipInputStream(inputStream); + ZipEntry entry; + byte[] ch = new byte[256]; + while ((entry = zipInputStream.getNextEntry()) != null) { + File zFile = new File(DocxUnzipDirectory + entry.getName()); + if (entry.isDirectory()) { + if (!zFile.exists()) { + zFile.mkdirs(); + } + zipInputStream.closeEntry(); + } else { + File fpath = new File(zFile.getParent()); + if (!fpath.exists()) { + fpath.mkdirs(); + } + FileOutputStream outputStream = new FileOutputStream(zFile); + int i; + while ((i = zipInputStream.read(ch)) != -1) { + outputStream.write(ch, 0, i); + } + zipInputStream.closeEntry(); + outputStream.close(); + } + } + inputStream.close(); + } + + + /** + * 读取指定的Chart数据 + * + * @param chartNumber + */ + public static List> readChart(int chartNumber) throws DocumentException { + List> matrix = new ArrayList<>(); + String xml = DocxUnzipDirectory + "word\\charts\\chart" + chartNumber + ".xml"; + if (!(new File(xml).exists())) { + System.out.println("没有找到第" + chartNumber + "个图表"); + return matrix; + } + //3、开始读取 + // 创建 SAXReader 对象,读取 XML 文件 + SAXReader reader = new SAXReader(); + Document document = reader.read(new File(xml)); + // 获取根元素 + Element root = document.getRootElement(); + //折线图 + //将xml用IDEA打开,搜索关键的数据值,然后右键查看XPATH完整路径可以获取到下面的路径 + ///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:cat/c:numRef/c:numCache/c:pt/c:v + + //在每个已知的图表类型中查找,找到后跳出循环 + //声明一个数组,图表的所有类型 + String[] CHART_TYPES = {"lineChart", "barChart"};//折线,柱状 + String type = ""; + for (String chartType : CHART_TYPES) { + if (root.element("chart").element("plotArea") + .element(chartType) != null) { + type = chartType; + break; + } + } + if (StrKit.isBlank(type)) { + System.out.println("没有找到图表类型,请扩充图表类型"); + System.out.println(root.element("chart").element("plotArea")); + System.exit(-1); + } + + List xList = root.element("chart").element("plotArea").element(type).element("ser").element("cat") + .element("numRef").element("numCache").elements("pt"); + + List listObject = new ArrayList<>(); + ///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:val/c:numRef/c:numCache + for (Element ser : root.element("chart").element("plotArea").element(type).elements("ser")) { + List yList = ser.element("val").element("numRef").element("numCache").elements("pt"); + + //记录都有哪些有效数值和索引号 + Map map = new HashMap<>(); + for (Element e : yList) { + map.put(Integer.parseInt(e.attribute("idx").getValue()), e); + } + List list = new ArrayList<>(); + for (int i = 0; i < xList.size(); i++) { + list.add(map.getOrDefault(i, null)); + } + listObject.add(list); + } + //数据是按行读取的 + for (int i = 0; i < listObject.size(); i++) { + List row = new ArrayList<>(); + List lo = (List) listObject.get(i); + for (Element e : lo) { + if (e == null) { + row.add(null); + } else { + row.add(e.element("v").getText()); + } + } + matrix.add(row); + } + //上面生成的数据格式需要行转列,横坐标是年份,纵坐标是数据 + int rowCount = matrix.size();// 计算行数和列数 + int colCount = matrix.getFirst().size(); + // 创建一个一维列表,用于存储转换后的列 + List> transposed = new ArrayList<>(); + // 遍历每一列 + for (int col = 0; col < colCount; col++) { + // 创建一个新的内部列表,用于存储当前列的所有行 + List column = new ArrayList<>(); + // 遍历每一行,将当前列的值添加到新的内部列表中 + for (int row = 0; row < rowCount; row++) { + String x = matrix.get(row).get(col); + //如果x可以转为小数,那么需要保留两位小数,如果可以转换为整数,那么直接返回整数,如果是字符串,就不变 + try { + double d = Double.parseDouble(x); + if (d == (int) d) { + x = String.valueOf((int) d); + } else { + x = String.format("%.2f", d); + } + } catch (Exception e) { + //do nothing + } + column.add(x); + } + // 将当前列添加到结果列表中 + transposed.add(column); + } + // 在每一行的第一列插入年份 + for (int i = 0; i < transposed.size(); i++) { + List column = transposed.get(i); + column.addFirst(xList.get(i).element("v").getText()); // 在每行的开始插入序号 + } + return transposed; + } } diff --git a/src/main/java/com/dsideal/base/Tools/Test/TestOutSideExcel.java b/src/main/java/com/dsideal/base/Tools/Test/TestOutSideExcel.java index aa8e9444..5ab05d94 100644 --- a/src/main/java/com/dsideal/base/Tools/Test/TestOutSideExcel.java +++ b/src/main/java/com/dsideal/base/Tools/Test/TestOutSideExcel.java @@ -1,173 +1,28 @@ package com.dsideal.base.Tools.Test; import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit; -import com.jfinal.kit.StrKit; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; -import org.dom4j.Document; import org.dom4j.DocumentException; -import org.dom4j.Element; -import org.dom4j.io.SAXReader; import org.xml.sax.SAXException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; public class TestOutSideExcel { - /** - * 解压缩 - * - * @throws IOException - */ - public static void UnCompress(String wordPath, String workingPath) throws IOException { - workingPath = workingPath.replace("\\", "/"); - if (!workingPath.endsWith("/")) workingPath += "/"; - File file = new File(wordPath);//取得word文件 - FileInputStream inputStream = new FileInputStream(file); - ZipInputStream zipInputStream = new ZipInputStream(inputStream); - ZipEntry entry; - byte[] ch = new byte[256]; - while ((entry = zipInputStream.getNextEntry()) != null) { - File zFile = new File(workingPath + entry.getName()); - if (entry.isDirectory()) { - if (!zFile.exists()) { - zFile.mkdirs(); - } - zipInputStream.closeEntry(); - } else { - File fpath = new File(zFile.getParent()); - if (!fpath.exists()) { - fpath.mkdirs(); - } - FileOutputStream outputStream = new FileOutputStream(zFile); - int i; - while ((i = zipInputStream.read(ch)) != -1) { - outputStream.write(ch, 0, i); - } - zipInputStream.closeEntry(); - outputStream.close(); - } - } - inputStream.close(); - } - - - /** - * 读取指定的Chart数据 - * - * @param workingPath - * @param chartNumber - */ - public static List> readChart(String workingPath, int chartNumber) throws DocumentException { - List> matrix = new ArrayList<>(); - String xml = workingPath + "\\word\\charts\\chart" + chartNumber + ".xml"; - if (!(new File(xml).exists())) { - System.out.println("没有找到第" + chartNumber + "个图表"); - return matrix; - } - //3、开始读取 - // 创建 SAXReader 对象,读取 XML 文件 - SAXReader reader = new SAXReader(); - Document document = reader.read(new File(xml)); - // 获取根元素 - Element root = document.getRootElement(); - //折线图 - //将xml用IDEA打开,搜索关键的数据值,然后右键查看XPATH完整路径可以获取到下面的路径 - ///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:cat/c:numRef/c:numCache/c:pt/c:v - - //在每个已知的图表类型中查找,找到后跳出循环 - //声明一个数组,图表的所有类型 - String[] CHART_TYPES = {"lineChart", "barChart"};//折线,柱状 - String type = ""; - for (String chartType : CHART_TYPES) { - if (root.element("chart").element("plotArea") - .element(chartType) != null) { - type = chartType; - break; - } - } - if (StrKit.isBlank(type)) { - System.out.println("没有找到图表类型,请扩充图表类型"); - System.out.println(root.element("chart").element("plotArea")); - System.exit(-1); - } - List xList = root.element("chart").element("plotArea").element(type).element("ser").element("cat") - .element("numRef").element("numCache").elements("pt"); - - List listObject = new ArrayList<>(); - ///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:val/c:numRef/c:numCache - for (Element ser : root.element("chart").element("plotArea").element(type).elements("ser")) { - List yList = ser.element("val").element("numRef").element("numCache").elements("pt"); - - //记录都有哪些有效数值和索引号 - Map map = new HashMap<>(); - for (Element e : yList) { - map.put(Integer.parseInt(e.attribute("idx").getValue()), e); - } - List list = new ArrayList<>(); - for (int i = 0; i < xList.size(); i++) { - list.add(map.getOrDefault(i, null)); - } - listObject.add(list); - } - //数据是按行读取的 - for (int i = 0; i < listObject.size(); i++) { - List row = new ArrayList<>(); - List lo = (List) listObject.get(i); - for (Element e : lo) { - if (e == null) { - row.add(null); - } else { - row.add(e.element("v").getText()); - } - } - matrix.add(row); - } - //上面生成的数据格式需要行转列,横坐标是年份,纵坐标是数据 - int rowCount = matrix.size();// 计算行数和列数 - int colCount = matrix.getFirst().size(); - // 创建一个一维列表,用于存储转换后的列 - List> transposed = new ArrayList<>(); - // 遍历每一列 - for (int col = 0; col < colCount; col++) { - // 创建一个新的内部列表,用于存储当前列的所有行 - List column = new ArrayList<>(); - // 遍历每一行,将当前列的值添加到新的内部列表中 - for (int row = 0; row < rowCount; row++) { - column.add(matrix.get(row).get(col)); - } - // 将当前列添加到结果列表中 - transposed.add(column); - } - // 在每一行的第一列插入年份 - for (int i = 0; i < transposed.size(); i++) { - List column = transposed.get(i); - column.addFirst(xList.get(i).element("v").getText()); // 在每行的开始插入序号 - } - return transposed; - } public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException, ParserConfigurationException, SAXException, XPathExpressionException, DocumentException { String sourceDoc = "c:/西双版纳州人口变化及其对教育的影响20240420.docx"; //1、将word文件解压缩 - String workingPath = "C:\\zipFile"; - UnCompress(sourceDoc, workingPath); + ExcelKit.UnCompress(sourceDoc); //2、我们需要第几个图表 for (int chartNumber = 1; chartNumber <= 37; chartNumber++) { System.out.println("正在处理第" + chartNumber + "个图表的信息~"); //读取图表 - List> list = readChart(workingPath, chartNumber); + List> list = ExcelKit.readChart( chartNumber); ExcelKit.printTable(list); System.out.println("========================================================="); }