main
黄海 8 months ago
parent 73054a5932
commit 9c9e4da43e

@ -28,43 +28,6 @@ public class C9 {
//示例Excel
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【9】总人口变化及预测-双\\总人口变化及预测-双.xlsx";
/**
*
*
* @param docPath
* @param chartNumber
* @param skipRowCount
* @return
* @throws IOException
* @throws InvalidFormatException
*/
public static List<List<String>> getChartData(String docPath, int chartNumber, int skipRowCount, int expectLimit) throws IOException, InvalidFormatException, InterruptedException {
InputStream is = new FileInputStream(docPath);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//排序后的图表
List<XWPFChart> charts = ExcelKit.getSortListForXWPFChart(doc.getCharts());
XSSFWorkbook workbook = charts.get(chartNumber).getWorkbook();
List<List<String>> data = ExcelKit.readSheet(workbook, skipRowCount);
is.close();
//如果达到目标预期的数量就直接返回poi获取的数据列表
int totalRow = data.size() + skipRowCount;
if (totalRow < expectLimit) {
System.out.println("期望数量>=" + expectLimit + ",现在只有" + totalRow + "条理解为POI读取WORD图表存在问题使用python进行二次获取数据...");
// 留出足够的com关闭word的时间长度否则会有异常
Thread.sleep(3000);
//否则调用python+com进行再次获取数据列表这次获取的可能才是对的
//写入交互文本文件
ExcelKit.callPythonPrepare(docPath, chartNumber);
//对图表进行读取
ExcelKit.callPythonRead();
//读取生成的EXCEL,使用POI就可以了
data = ExcelKit.readSheet(ExcelKit.excelPath, skipRowCount);
System.out.println("二次获取数据条目数量:" + data.size() + ",期望数量=" + expectLimit);
}
return data;
}
public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException {
//初始化数据库连接
@ -104,14 +67,17 @@ public class C9 {
System.out.println("正在处理" + cityName + "市州文件...");
//数据在图表1
int chartNumber = 1;
List<List<String>> source = getChartData(file.getAbsolutePath(), chartNumber, 6, 20);//2017年开始
//chartNumber:第几个图表
//skipRowCount:跳过的行数,2017年开始,第一行是表头第二行开始是2022所以填写了跳过6行
//expectLimit:期望的数据行数,首先用POI进行解析如果获取的行数大于预期行数就是正确的否则就需要二次调用python进行读取
List<List<String>> source = ExcelKit.getChartData(file.getAbsolutePath(), chartNumber, 6, 20);
//遍历source1
for (List<String> r : source) {
Row outRow = outSheet.createRow(++rowIndex);
// 导出数据
//上级行政区划,行政区划,年份,总人口变化,总人口预测
int year = Integer.parseInt(r.getFirst());
int year = Integer.parseInt(r.getFirst().substring(0, 4));
double value = Double.parseDouble(r.get(1));
if (year <= 2023) {

@ -2,16 +2,20 @@ package com.dsideal.base.Tools.FillData.ExcelKit;
import com.dsideal.base.DataEase.Model.ExcelReader;
import com.jfinal.kit.StrKit;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFChart;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
public class ExcelKit {
@ -380,4 +384,104 @@ public class ExcelKit {
writer.flush();
writer.close();
}
/**
* excel
* @param excelFilePath
* @param skipRows
* @return
*/
public static List<List<String>> readExcelToList(String excelFilePath, int skipRows) {
List<List<String>> data = new ArrayList<>();
try (FileInputStream inputStream = new FileInputStream(excelFilePath)) {
Workbook workbook = new XSSFWorkbook(inputStream);
Sheet sheet = workbook.getSheetAt(0); // 获取第一个Sheet
// 创建一个迭代器来遍历行,跳过指定数量的行
Iterator<Row> rowIterator = sheet.iterator();
while (skipRows > 0 && rowIterator.hasNext()) {
rowIterator.next();
skipRows--;
}
// 遍历每一行
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
List<String> rowData = new ArrayList<>();
// 遍历每一行中的每一列
Iterator<Cell> cellIterator = row.cellIterator();
while (cellIterator.hasNext()) {
Cell cell = cellIterator.next();
// 根据单元格的不同类型获取数据
switch (cell.getCellType()) {
case STRING:
rowData.add(cell.getStringCellValue());
break;
case NUMERIC:
if (DateUtil.isCellDateFormatted(cell)) {
rowData.add(cell.getDateCellValue().toString());
} else {
rowData.add(Double.toString(cell.getNumericCellValue()));
}
break;
case BOOLEAN:
rowData.add(Boolean.toString(cell.getBooleanCellValue()));
break;
case FORMULA:
rowData.add(cell.getCellFormula());
break;
default:
rowData.add("");
break;
}
}
data.add(rowData);
}
workbook.close();
} catch (Exception e) {
e.printStackTrace();
}
return data;
}
/**
*
*
* @param docPath
* @param chartNumber
* @param skipRowCount
* @return
* @throws IOException
* @throws InvalidFormatException
*/
public static List<List<String>> getChartData(String docPath, int chartNumber, int skipRowCount, int expectLimit) throws IOException, InvalidFormatException, InterruptedException {
InputStream is = new FileInputStream(docPath);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//排序后的图表
List<XWPFChart> charts = ExcelKit.getSortListForXWPFChart(doc.getCharts());
XSSFWorkbook workbook = charts.get(chartNumber).getWorkbook();
List<List<String>> data = ExcelKit.readSheet(workbook, skipRowCount);
is.close();
//如果达到目标预期的数量就直接返回poi获取的数据列表
int totalRow = data.size() + skipRowCount;
if (totalRow < expectLimit) {
System.out.println("期望数量>=" + expectLimit + ",现在只有" + totalRow + "条理解为POI读取WORD图表存在问题使用python进行二次获取数据...");
// 留出足够的com关闭word的时间长度否则会有异常
Thread.sleep(4000);
//否则调用python+com进行再次获取数据列表这次获取的可能才是对的
//写入交互文本文件
ExcelKit.callPythonPrepare(docPath, chartNumber);
//对图表进行读取
ExcelKit.callPythonRead();
//读取生成的EXCEL,使用POI就可以了
//使用POI
int skipRows = 1; // 假设我们要跳过第一行
data = ExcelKit.readExcelToList(ExcelKit.excelPath, skipRows);
System.out.println("二次获取数据条目数量:" + data.size() + ",期望数量=" + expectLimit);
}
return data;
}
}

Loading…
Cancel
Save