main
黄海 8 months ago
parent 5160afefb0
commit 0720752da2

@ -1,11 +1,9 @@
# pip install pywin32
# https://blog.csdn.net/weixin_42927998/article/details/115086797
import os
import win32com
from win32com.client import constants, Dispatch
from openpyxl import Workbook
from win32com.client import constants
if __name__ == '__main__':
# 文件路径

@ -28,7 +28,41 @@ public class C9 {
//示例Excel
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【9】总人口变化及预测-双\\总人口变化及预测-双.xlsx";
public static void main(String[] args) throws IOException, InvalidFormatException {
/**
*
*
* @param docPath
* @param chartNumber
* @param skipRowCount
* @return
* @throws IOException
* @throws InvalidFormatException
*/
public static List<List<String>> getChartData(String docPath, int chartNumber, int skipRowCount, int expectLimit) throws IOException, InvalidFormatException, InterruptedException {
InputStream is = new FileInputStream(docPath);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//排序后的图表
List<XWPFChart> charts = ExcelKit.getSortListForXWPFChart(doc.getCharts());
XSSFWorkbook workbook = charts.get(chartNumber).getWorkbook();
List<List<String>> data = ExcelKit.readSheet(workbook, skipRowCount);
workbook.close();
//如果达到目标预期的数量就直接返回poi获取的数据列表
if (data.size() < expectLimit) {
System.out.println("数据不足,重新获取数据,现正在使用python_docx进行二次获取数据...");
//否则调用python+com进行再次获取数据列表这次获取的可能才是对的
//写入交互文本文件
ExcelKit.callPythonPrepare(docPath, chartNumber);
//对图表进行读取
ExcelKit.callPythonRead();
//读取生成的EXCEL,使用POI就可以了
data = ExcelKit.readSheet(ExcelKit.excelPath, skipRowCount);
}
return data;
}
public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException {
//初始化数据库连接
LocalMysqlConnectUtil.Init();
//实例化
@ -65,25 +99,16 @@ public class C9 {
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
System.out.println("正在处理" + cityName + "市州文件...");
//读取文件
String inputUrl = file.getAbsolutePath();
InputStream is = new FileInputStream(inputUrl);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//排序后的图表
List<XWPFChart> charts = ExcelKit.getSortListForXWPFChart(doc.getCharts());
//数据在图表1
int firstChartNumber = 1;
List<List<String>> source1 = getChartData(file.getAbsolutePath(), firstChartNumber, 6,20);//2017年开始
XSSFWorkbook workbook = charts.get(firstChartNumber).getWorkbook();
// if(cityName.contains("丽江")){
// System.out.println("丽江");
// }
List<List<String>> source1 = ExcelKit.readSheet(workbook, 6);//从2017年开始
System.out.println(source1);
if (cityName.contains("丽江")) {
System.out.println("丽江");
System.out.println(source1);
}
//遍历source1
for (List<String> r : source1) {

@ -16,6 +16,13 @@ import java.util.List;
public class ExcelKit {
//与python交互使用的excel文件路径
public static String excelPath = "c:/task.xlsx";
//执行的python路径这里我使用的是anaconda3的python路径自行修改,注意要在这个环境中pip安装了python-docx否则会报错
public static String python = "D:\\anaconda3\\envs\\py310\\python.exe";
//python脚本路径
public static String py = "D:\\dsWork\\YunNanDsBase\\Py\\TuBiao.py";
/**
* xlsxlsx
*
@ -143,7 +150,7 @@ public class ExcelKit {
// 遍历工作表中的所有行
if (sheet == null) return array;
System.out.println("Sheet rows="+sheet.getPhysicalNumberOfRows());
System.out.println("Sheet rows=" + sheet.getPhysicalNumberOfRows());
for (Row row : sheet) {
rowIndex++;
if (rowIndex <= skipRowCount) continue;//跳过指定的行数
@ -163,7 +170,6 @@ public class ExcelKit {
}
array.add(X);
}
workbook.close();
return array;
}
@ -333,6 +339,7 @@ public class ExcelKit {
}
return --rlt;
}
/**
* python+comWORD
*
@ -340,9 +347,9 @@ public class ExcelKit {
* @throws InterruptedException
*/
public static void callPythonRead() throws IOException, InterruptedException {
ExcelKit.delExcel(excelPath);
// 创建ProcessBuilder对象并设置Python脚本的路径
String python = "D:\\anaconda3\\envs\\py310\\python.exe";
String py = "D:\\dsWork\\YunNanDsBase\\Py\\TuBiao.py";
ProcessBuilder processBuilder = new ProcessBuilder(python, py);
// 重定向错误流到标准输出这样可以在Java中捕获所有的输出
processBuilder.redirectErrorStream(true);
@ -358,7 +365,7 @@ public class ExcelKit {
process.waitFor();
}
public static void callPythonPrepare(String docPath,int tuBiaoNum) throws IOException {
public static void callPythonPrepare(String docPath, int tuBiaoNum) throws IOException {
String taskTxt = "c:/task.txt";
//如果文件存在则删除
if (new File(taskTxt).exists()) {

@ -0,0 +1,71 @@
package com.dsideal.base.Tools.FillData.Test;
import cn.hutool.core.io.FileUtil;
import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit;
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
import com.dsideal.base.Tools.Util.ReadDocxUtil;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.usermodel.XWPFChart;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class TestBadExcel {
//开始读取市州word文档
static String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510";
//示例Excel
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【11】教育资源配置发展预测\\教育资源配置发展预测(人).xlsx";
public static void main(String[] args) throws IOException, InvalidFormatException {
//初始化数据库连接
LocalMysqlConnectUtil.Init();
//实例化
ReadDocxUtil ru = new ReadDocxUtil();
//找到parentPath下一级目录中所有文件
List<File> files = FileUtil.loopFiles(parentPath, file -> true);
int rowIndex = 0;
//处理这个目录
if (files != null) {
for (File file : files) {
//判断file是不是目录是目录的需要跳过
if (file.isDirectory()) continue;
//城市名称
String cityName = ru.getCityOrAreaName(file.getName());
String fileName = file.getName();
//判断是否为docx文件
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
System.out.println("正在处理" + cityName + "市州文件...");
//读取文件
String inputUrl = file.getAbsolutePath();
InputStream is = new FileInputStream(inputUrl);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//排序后的图表
List<XWPFChart> charts = ExcelKit.getSortListForXWPFChart(doc.getCharts());
//数据在图表36教职工总量
int firstChartNumber = 36;
if(cityName.contains("西双版纳州")){
System.out.println("he");
}
charts.get(firstChartNumber - 1).getWorkbook();
}
}
}
}
}
Loading…
Cancel
Save