main
黄海 8 months ago
parent a7f413a28f
commit b785e66487

@ -11,12 +11,22 @@ import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFChart;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
public class ExcelKit {
// docx文件解压目录
public static String DocxUnzipDirectory = "C:\\zipFile\\";
/**
* xlsxlsx
@ -527,4 +537,146 @@ public class ExcelKit {
is.close();
}
/**
*
*
* @throws IOException
*/
public static void UnCompress(String wordPath) throws IOException {
File file = new File(wordPath);//取得word文件
FileInputStream inputStream = new FileInputStream(file);
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
byte[] ch = new byte[256];
while ((entry = zipInputStream.getNextEntry()) != null) {
File zFile = new File(DocxUnzipDirectory + entry.getName());
if (entry.isDirectory()) {
if (!zFile.exists()) {
zFile.mkdirs();
}
zipInputStream.closeEntry();
} else {
File fpath = new File(zFile.getParent());
if (!fpath.exists()) {
fpath.mkdirs();
}
FileOutputStream outputStream = new FileOutputStream(zFile);
int i;
while ((i = zipInputStream.read(ch)) != -1) {
outputStream.write(ch, 0, i);
}
zipInputStream.closeEntry();
outputStream.close();
}
}
inputStream.close();
}
/**
* Chart
*
* @param chartNumber
*/
public static List<List<String>> readChart(int chartNumber) throws DocumentException {
List<List<String>> matrix = new ArrayList<>();
String xml = DocxUnzipDirectory + "word\\charts\\chart" + chartNumber + ".xml";
if (!(new File(xml).exists())) {
System.out.println("没有找到第" + chartNumber + "个图表");
return matrix;
}
//3、开始读取
// 创建 SAXReader 对象,读取 XML 文件
SAXReader reader = new SAXReader();
Document document = reader.read(new File(xml));
// 获取根元素
Element root = document.getRootElement();
//折线图
//将xml用IDEA打开搜索关键的数据值然后右键查看XPATH完整路径可以获取到下面的路径
///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:cat/c:numRef/c:numCache/c:pt/c:v
//在每个已知的图表类型中查找,找到后跳出循环
//声明一个数组,图表的所有类型
String[] CHART_TYPES = {"lineChart", "barChart"};//折线,柱状
String type = "";
for (String chartType : CHART_TYPES) {
if (root.element("chart").element("plotArea")
.element(chartType) != null) {
type = chartType;
break;
}
}
if (StrKit.isBlank(type)) {
System.out.println("没有找到图表类型,请扩充图表类型");
System.out.println(root.element("chart").element("plotArea"));
System.exit(-1);
}
List<Element> xList = root.element("chart").element("plotArea").element(type).element("ser").element("cat")
.element("numRef").element("numCache").elements("pt");
List<Object> listObject = new ArrayList<>();
///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:val/c:numRef/c:numCache
for (Element ser : root.element("chart").element("plotArea").element(type).elements("ser")) {
List<Element> yList = ser.element("val").element("numRef").element("numCache").elements("pt");
//记录都有哪些有效数值和索引号
Map<Integer, Element> map = new HashMap<>();
for (Element e : yList) {
map.put(Integer.parseInt(e.attribute("idx").getValue()), e);
}
List<Object> list = new ArrayList<>();
for (int i = 0; i < xList.size(); i++) {
list.add(map.getOrDefault(i, null));
}
listObject.add(list);
}
//数据是按行读取的
for (int i = 0; i < listObject.size(); i++) {
List<String> row = new ArrayList<>();
List<Element> lo = (List<Element>) listObject.get(i);
for (Element e : lo) {
if (e == null) {
row.add(null);
} else {
row.add(e.element("v").getText());
}
}
matrix.add(row);
}
//上面生成的数据格式需要行转列,横坐标是年份,纵坐标是数据
int rowCount = matrix.size();// 计算行数和列数
int colCount = matrix.getFirst().size();
// 创建一个一维列表,用于存储转换后的列
List<List<String>> transposed = new ArrayList<>();
// 遍历每一列
for (int col = 0; col < colCount; col++) {
// 创建一个新的内部列表,用于存储当前列的所有行
List<String> column = new ArrayList<>();
// 遍历每一行,将当前列的值添加到新的内部列表中
for (int row = 0; row < rowCount; row++) {
String x = matrix.get(row).get(col);
//如果x可以转为小数那么需要保留两位小数如果可以转换为整数那么直接返回整数如果是字符串就不变
try {
double d = Double.parseDouble(x);
if (d == (int) d) {
x = String.valueOf((int) d);
} else {
x = String.format("%.2f", d);
}
} catch (Exception e) {
//do nothing
}
column.add(x);
}
// 将当前列添加到结果列表中
transposed.add(column);
}
// 在每一行的第一列插入年份
for (int i = 0; i < transposed.size(); i++) {
List<String> column = transposed.get(i);
column.addFirst(xList.get(i).element("v").getText()); // 在每行的开始插入序号
}
return transposed;
}
}

@ -1,173 +1,28 @@
package com.dsideal.base.Tools.Test;
import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit;
import com.jfinal.kit.StrKit;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathExpressionException;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
public class TestOutSideExcel {
/**
*
*
* @throws IOException
*/
public static void UnCompress(String wordPath, String workingPath) throws IOException {
workingPath = workingPath.replace("\\", "/");
if (!workingPath.endsWith("/")) workingPath += "/";
File file = new File(wordPath);//取得word文件
FileInputStream inputStream = new FileInputStream(file);
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
ZipEntry entry;
byte[] ch = new byte[256];
while ((entry = zipInputStream.getNextEntry()) != null) {
File zFile = new File(workingPath + entry.getName());
if (entry.isDirectory()) {
if (!zFile.exists()) {
zFile.mkdirs();
}
zipInputStream.closeEntry();
} else {
File fpath = new File(zFile.getParent());
if (!fpath.exists()) {
fpath.mkdirs();
}
FileOutputStream outputStream = new FileOutputStream(zFile);
int i;
while ((i = zipInputStream.read(ch)) != -1) {
outputStream.write(ch, 0, i);
}
zipInputStream.closeEntry();
outputStream.close();
}
}
inputStream.close();
}
/**
* Chart
*
* @param workingPath
* @param chartNumber
*/
public static List<List<String>> readChart(String workingPath, int chartNumber) throws DocumentException {
List<List<String>> matrix = new ArrayList<>();
String xml = workingPath + "\\word\\charts\\chart" + chartNumber + ".xml";
if (!(new File(xml).exists())) {
System.out.println("没有找到第" + chartNumber + "个图表");
return matrix;
}
//3、开始读取
// 创建 SAXReader 对象,读取 XML 文件
SAXReader reader = new SAXReader();
Document document = reader.read(new File(xml));
// 获取根元素
Element root = document.getRootElement();
//折线图
//将xml用IDEA打开搜索关键的数据值然后右键查看XPATH完整路径可以获取到下面的路径
///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:cat/c:numRef/c:numCache/c:pt/c:v
//在每个已知的图表类型中查找,找到后跳出循环
//声明一个数组,图表的所有类型
String[] CHART_TYPES = {"lineChart", "barChart"};//折线,柱状
String type = "";
for (String chartType : CHART_TYPES) {
if (root.element("chart").element("plotArea")
.element(chartType) != null) {
type = chartType;
break;
}
}
if (StrKit.isBlank(type)) {
System.out.println("没有找到图表类型,请扩充图表类型");
System.out.println(root.element("chart").element("plotArea"));
System.exit(-1);
}
List<Element> xList = root.element("chart").element("plotArea").element(type).element("ser").element("cat")
.element("numRef").element("numCache").elements("pt");
List<Object> listObject = new ArrayList<>();
///c:chartSpace/c:chart/c:plotArea/c:lineChart/c:ser/c:val/c:numRef/c:numCache
for (Element ser : root.element("chart").element("plotArea").element(type).elements("ser")) {
List<Element> yList = ser.element("val").element("numRef").element("numCache").elements("pt");
//记录都有哪些有效数值和索引号
Map<Integer, Element> map = new HashMap<>();
for (Element e : yList) {
map.put(Integer.parseInt(e.attribute("idx").getValue()), e);
}
List<Object> list = new ArrayList<>();
for (int i = 0; i < xList.size(); i++) {
list.add(map.getOrDefault(i, null));
}
listObject.add(list);
}
//数据是按行读取的
for (int i = 0; i < listObject.size(); i++) {
List<String> row = new ArrayList<>();
List<Element> lo = (List<Element>) listObject.get(i);
for (Element e : lo) {
if (e == null) {
row.add(null);
} else {
row.add(e.element("v").getText());
}
}
matrix.add(row);
}
//上面生成的数据格式需要行转列,横坐标是年份,纵坐标是数据
int rowCount = matrix.size();// 计算行数和列数
int colCount = matrix.getFirst().size();
// 创建一个一维列表,用于存储转换后的列
List<List<String>> transposed = new ArrayList<>();
// 遍历每一列
for (int col = 0; col < colCount; col++) {
// 创建一个新的内部列表,用于存储当前列的所有行
List<String> column = new ArrayList<>();
// 遍历每一行,将当前列的值添加到新的内部列表中
for (int row = 0; row < rowCount; row++) {
column.add(matrix.get(row).get(col));
}
// 将当前列添加到结果列表中
transposed.add(column);
}
// 在每一行的第一列插入年份
for (int i = 0; i < transposed.size(); i++) {
List<String> column = transposed.get(i);
column.addFirst(xList.get(i).element("v").getText()); // 在每行的开始插入序号
}
return transposed;
}
public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException, ParserConfigurationException, SAXException, XPathExpressionException, DocumentException {
String sourceDoc = "c:/西双版纳州人口变化及其对教育的影响20240420.docx";
//1、将word文件解压缩
String workingPath = "C:\\zipFile";
UnCompress(sourceDoc, workingPath);
ExcelKit.UnCompress(sourceDoc);
//2、我们需要第几个图表
for (int chartNumber = 1; chartNumber <= 37; chartNumber++) {
System.out.println("正在处理第" + chartNumber + "个图表的信息~");
//读取图表
List<List<String>> list = readChart(workingPath, chartNumber);
List<List<String>> list = ExcelKit.readChart( chartNumber);
ExcelKit.printTable(list);
System.out.println("=========================================================");
}

Loading…
Cancel
Save