diff --git a/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/~$市人口变化及其对教育的影响20240419.docx b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/~$市人口变化及其对教育的影响20240419.docx new file mode 100644 index 00000000..5988520c Binary files /dev/null and b/Doc/全省及州市县区人口与教育报告集20241023/16个州市报告2022/分析报告20240510/~$市人口变化及其对教育的影响20240419.docx differ diff --git a/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx index 58266e80..08ce71f4 100644 Binary files a/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx and b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人).xlsx differ diff --git a/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人)【成果】.xlsx b/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人)【成果】.xlsx deleted file mode 100644 index 3d8b7493..00000000 Binary files a/Doc/待处理/市/【11】教育资源配置发展预测/教育资源配置发展预测(人)【成果】.xlsx and /dev/null differ diff --git a/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双.xlsx b/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双.xlsx index 2478595b..efff7f65 100644 Binary files a/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双.xlsx and b/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双.xlsx differ diff --git a/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双【成果】.xlsx b/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双【成果】.xlsx deleted file mode 100644 index fc5ab116..00000000 Binary files a/Doc/待处理/市/【9】总人口变化及预测-双/总人口变化及预测-双【成果】.xlsx and /dev/null differ diff --git a/Py/TuBiao.py b/Py/TuBiao.py index 7f859307..da5d0ac7 100644 --- a/Py/TuBiao.py +++ b/Py/TuBiao.py @@ -1,11 +1,9 @@ # pip install pywin32 # https://blog.csdn.net/weixin_42927998/article/details/115086797 -import os import win32com -from win32com.client import constants, Dispatch - from openpyxl import Workbook +from win32com.client import constants if __name__ == '__main__': # 文件路径 diff --git a/src/main/java/com/dsideal/base/Tools/FillData/City/C9.java b/src/main/java/com/dsideal/base/Tools/FillData/City/C9.java index a7ae21ba..fdeac35f 100644 --- a/src/main/java/com/dsideal/base/Tools/FillData/City/C9.java +++ b/src/main/java/com/dsideal/base/Tools/FillData/City/C9.java @@ -28,7 +28,41 @@ public class C9 { //示例Excel static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【9】总人口变化及预测-双\\总人口变化及预测-双.xlsx"; - public static void main(String[] args) throws IOException, InvalidFormatException { + /** + * 获取指定文档中指定图表数据 + * + * @param docPath 文档路径 + * @param chartNumber 图表序号 + * @param skipRowCount 跳过的行数 + * @return 结果数据 + * @throws IOException + * @throws InvalidFormatException + */ + public static List> getChartData(String docPath, int chartNumber, int skipRowCount, int expectLimit) throws IOException, InvalidFormatException, InterruptedException { + InputStream is = new FileInputStream(docPath); + ZipSecureFile.setMinInflateRatio(-1.0d); + XWPFDocument doc = new XWPFDocument(is); + //排序后的图表 + List charts = ExcelKit.getSortListForXWPFChart(doc.getCharts()); + XSSFWorkbook workbook = charts.get(chartNumber).getWorkbook(); + List> data = ExcelKit.readSheet(workbook, skipRowCount); + workbook.close(); + + //如果达到目标预期的数量,就直接返回poi获取的数据列表 + if (data.size() < expectLimit) { + System.out.println("数据不足,重新获取数据,现正在使用python_docx进行二次获取数据..."); + //否则调用python+com进行再次获取数据列表,这次获取的可能才是对的 + //写入交互文本文件 + ExcelKit.callPythonPrepare(docPath, chartNumber); + //对图表进行读取 + ExcelKit.callPythonRead(); + //读取生成的EXCEL,使用POI就可以了 + data = ExcelKit.readSheet(ExcelKit.excelPath, skipRowCount); + } + return data; + } + + public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException { //初始化数据库连接 LocalMysqlConnectUtil.Init(); //实例化 @@ -65,25 +99,16 @@ public class C9 { if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { System.out.println("正在处理" + cityName + "市州文件..."); //读取文件 - String inputUrl = file.getAbsolutePath(); - InputStream is = new FileInputStream(inputUrl); - ZipSecureFile.setMinInflateRatio(-1.0d); - XWPFDocument doc = new XWPFDocument(is); - //排序后的图表 - List charts = ExcelKit.getSortListForXWPFChart(doc.getCharts()); + //数据在图表1 int firstChartNumber = 1; + List> source1 = getChartData(file.getAbsolutePath(), firstChartNumber, 6,20);//2017年开始 - XSSFWorkbook workbook = charts.get(firstChartNumber).getWorkbook(); -// if(cityName.contains("丽江")){ -// System.out.println("丽江"); -// } - List> source1 = ExcelKit.readSheet(workbook, 6);//从2017年开始 - - System.out.println(source1); - - + if (cityName.contains("丽江")) { + System.out.println("丽江"); + System.out.println(source1); + } //遍历source1 for (List r : source1) { diff --git a/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java b/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java index 0addd728..b914cf39 100644 --- a/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java +++ b/src/main/java/com/dsideal/base/Tools/FillData/ExcelKit/ExcelKit.java @@ -16,6 +16,13 @@ import java.util.List; public class ExcelKit { + //与python交互使用的excel文件路径 + public static String excelPath = "c:/task.xlsx"; + //执行的python路径,这里我使用的是anaconda3的python,路径自行修改,注意要在这个环境中pip安装了python-docx,否则会报错 + public static String python = "D:\\anaconda3\\envs\\py310\\python.exe"; + //python脚本路径 + public static String py = "D:\\dsWork\\YunNanDsBase\\Py\\TuBiao.py"; + /** * 将xls转换为xlsx * @@ -143,7 +150,7 @@ public class ExcelKit { // 遍历工作表中的所有行 if (sheet == null) return array; - System.out.println("Sheet rows="+sheet.getPhysicalNumberOfRows()); + System.out.println("Sheet rows=" + sheet.getPhysicalNumberOfRows()); for (Row row : sheet) { rowIndex++; if (rowIndex <= skipRowCount) continue;//跳过指定的行数 @@ -163,7 +170,6 @@ public class ExcelKit { } array.add(X); } - workbook.close(); return array; } @@ -333,6 +339,7 @@ public class ExcelKit { } return --rlt; } + /** * 调用python+com读取WORD中的图表 * @@ -340,9 +347,9 @@ public class ExcelKit { * @throws InterruptedException */ public static void callPythonRead() throws IOException, InterruptedException { + + ExcelKit.delExcel(excelPath); // 创建ProcessBuilder对象,并设置Python脚本的路径 - String python = "D:\\anaconda3\\envs\\py310\\python.exe"; - String py = "D:\\dsWork\\YunNanDsBase\\Py\\TuBiao.py"; ProcessBuilder processBuilder = new ProcessBuilder(python, py); // 重定向错误流到标准输出,这样可以在Java中捕获所有的输出 processBuilder.redirectErrorStream(true); @@ -358,7 +365,7 @@ public class ExcelKit { process.waitFor(); } - public static void callPythonPrepare(String docPath,int tuBiaoNum) throws IOException { + public static void callPythonPrepare(String docPath, int tuBiaoNum) throws IOException { String taskTxt = "c:/task.txt"; //如果文件存在则删除 if (new File(taskTxt).exists()) { diff --git a/src/main/java/com/dsideal/base/Tools/FillData/Test/TestBadExcel.java b/src/main/java/com/dsideal/base/Tools/FillData/Test/TestBadExcel.java new file mode 100644 index 00000000..dbaf102b --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/FillData/Test/TestBadExcel.java @@ -0,0 +1,71 @@ +package com.dsideal.base.Tools.FillData.Test; + +import cn.hutool.core.io.FileUtil; +import com.dsideal.base.Tools.FillData.ExcelKit.ExcelKit; +import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil; +import com.dsideal.base.Tools.Util.ReadDocxUtil; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.xwpf.usermodel.XWPFChart; +import org.apache.poi.xwpf.usermodel.XWPFDocument; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class TestBadExcel { + //开始读取市州word文档 + static String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; + + //示例Excel + static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\市\\【11】教育资源配置发展预测\\教育资源配置发展预测(人).xlsx"; + + public static void main(String[] args) throws IOException, InvalidFormatException { + //初始化数据库连接 + LocalMysqlConnectUtil.Init(); + //实例化 + ReadDocxUtil ru = new ReadDocxUtil(); + + + //找到parentPath下一级目录中所有文件 + List files = FileUtil.loopFiles(parentPath, file -> true); + int rowIndex = 0; + //处理这个目录 + if (files != null) { + for (File file : files) { + //判断file是不是目录,是目录的需要跳过 + if (file.isDirectory()) continue; + //城市名称 + String cityName = ru.getCityOrAreaName(file.getName()); + String fileName = file.getName(); + + //判断是否为docx文件 + if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { + System.out.println("正在处理" + cityName + "市州文件..."); + //读取文件 + String inputUrl = file.getAbsolutePath(); + InputStream is = new FileInputStream(inputUrl); + ZipSecureFile.setMinInflateRatio(-1.0d); + XWPFDocument doc = new XWPFDocument(is); + //排序后的图表 + List charts = ExcelKit.getSortListForXWPFChart(doc.getCharts()); + + //数据在图表36,教职工总量 + int firstChartNumber = 36; + if(cityName.contains("西双版纳州")){ + System.out.println("he"); + } + charts.get(firstChartNumber - 1).getWorkbook(); + } + } + } + } +}