diff --git a/Doc/待处理/区/【13】教育特征决策建议/【云南省】教育特征-决策建议.xlsx b/Doc/待处理/区/【13】教育特征决策建议/【云南省】教育特征-决策建议.xlsx new file mode 100644 index 00000000..bc4c3f6b Binary files /dev/null and b/Doc/待处理/区/【13】教育特征决策建议/【云南省】教育特征-决策建议.xlsx differ diff --git a/Doc/待处理/区/【13】教育特征决策建议/【云南省】教育特征-决策建议【成果】.xlsx b/Doc/待处理/区/【13】教育特征决策建议/【云南省】教育特征-决策建议【成果】.xlsx new file mode 100644 index 00000000..279969b3 Binary files /dev/null and b/Doc/待处理/区/【13】教育特征决策建议/【云南省】教育特征-决策建议【成果】.xlsx differ diff --git a/Doc/待处理/区/【13】教育特征决策建议/寻甸县人口变化趋势对基础教育的影响.docx b/Doc/待处理/区/【13】教育特征决策建议/寻甸县人口变化趋势对基础教育的影响.docx new file mode 100644 index 00000000..0773a828 Binary files /dev/null and b/Doc/待处理/区/【13】教育特征决策建议/寻甸县人口变化趋势对基础教育的影响.docx differ diff --git a/WebRoot/upload/E220DB56-BF27-4C98-A462-01914B61BEA9.xlsx b/WebRoot/upload/E220DB56-BF27-4C98-A462-01914B61BEA9.xlsx new file mode 100644 index 00000000..2caeb690 Binary files /dev/null and b/WebRoot/upload/E220DB56-BF27-4C98-A462-01914B61BEA9.xlsx differ diff --git a/src/main/java/com/dsideal/base/Tools/FillData/Area/A13.java b/src/main/java/com/dsideal/base/Tools/FillData/Area/A13.java new file mode 100644 index 00000000..5a2f561b --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/FillData/Area/A13.java @@ -0,0 +1,182 @@ +package com.dsideal.base.Tools.FillData.Area; + +import cn.hutool.core.io.FileUtil; +import com.dsideal.base.DataEase.Model.DataEaseModel; +import com.dsideal.base.Tools.FillData.DataEaseKit.DsKit; +import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil; +import com.jfinal.kit.StrKit; +import org.apache.commons.io.FileUtils; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.Element; +import org.dom4j.io.SAXReader; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; + +import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory; + +public class A13 { + //只输出四和五 + static String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"}; + //转为 List + static List printDxList = Arrays.asList(printDx); + //哪些是处理不了的,就不处理了~ + static String[] excludeCityList = {"~$", "磨憨-磨丁", "经开区", "阳宗海"}; + //示例Excel + static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\区\\【13】教育特征决策建议\\【云南省】教育特征-决策建议.xlsx"; + + /** + * 提取Word文档中的目录信息 + * + * @param wordPath + * @throws DocumentException + * @throws IOException + */ + + + static DataEaseModel dm = new DataEaseModel(); + + public static List get4() throws DocumentException { + List list = new ArrayList<>(); + //读入XML + String xmlPath = DocxUnzipDirectory + "word\\document.xml"; + SAXReader reader = new SAXReader(); // 创建 SAXReader 对象,读取 XML 文件 + Document document = reader.read(new File(xmlPath)); + Element root = document.getRootElement();// 获取根元素 + List children = root.element("body").elements("p");//工作区 + boolean out = false; + int parent = 0; + for (Element child : children) { + if (child.getName().equals("p")) { + List pChildren = child.elements(); + String content = ""; + for (Element pChild : pChildren) { + if (!pChild.getName().equals("pPr")) { + if (pChild.getName().equals("r")) { + for (Element t : pChild.elements("t")) { + content = content + t.getText(); + } + } + } + } + if (!StrKit.isBlank(content)) { + //如果content是 "图"+数字形式的,不输出 + if (!content.contains("(图") && !content.contains("(图")) { + //如果文字不是以上面printDx中的某一个开头,而且不是以数字+.开头,不输出 + if (content.startsWith("(") && printDxList.contains(String.valueOf(content.charAt(1)))) { + out = true; + } + //太长的不要 + if (content.length() > 40) continue; + if (printDxList.contains(content.substring(0, 1))) { + if (content.charAt(0) == '四' && content.charAt(1) == '、') { + parent = 4; + } + if (content.charAt(0) == '五' && content.charAt(1) == '、') { + parent = 5; + } + out = true; + } + if (out && parent > 0) { + if (!content.startsWith("(")) continue; + if (parent == 4) { + list.add(content.split("。")[0]); + } +// if (parent == 5) { +// System.out.println("==================五==============="); +// System.out.println(content.split("。")[0]); +// } + out = false; + } + } + } + } + } + return list; + } + + public static void main(String[] args) throws IOException, DocumentException { + //初始化数据库连接 + LocalMysqlConnectUtil.Init(); + //结果Excel + XSSFWorkbook outWorkbook = new XSSFWorkbook(); + //结果Sheet + XSSFSheet outSheet = DsKit.createSheet(outWorkbook); + //样式 + XSSFCellStyle headerStyle = DsKit.getHeaderStyle(outWorkbook); + XSSFCellStyle dataStyle = DsKit.getDataStyle(outWorkbook); + //如果样例文件是xls格式,则转化为xlsx格式 + sampleExcelPath = DsKit.convertXlsToXlsx(sampleExcelPath); + //拷贝文件头 + DsKit.copyHead(sampleExcelPath, outSheet, headerStyle); + + //目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死 + String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx"); + DsKit.delExcel(excelPath); + + String parentPath = "D:\\dsidealDoc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告"; + List files = FileUtil.loopFiles(parentPath, file -> true); + int rowIndex = 0; + //处理这个目录 + if (files != null) { + for (File file : files) { + //判断file是不是目录,是目录的需要跳过 + if (file.isDirectory()) continue; + String fileName = file.getName(); + //判断是否为docx文件 + if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { + boolean flag = false; + for (String s : excludeCityList) { + if (file.getName().contains(s)) { + flag = true; + break; + } + } + if (flag) continue; + //县区名称 + String areaName = dm.getAreaName(file.getName()); + //市州名称 + String cityName = dm.getCityNameByAreaName(areaName); + + if (StrKit.isBlank(cityName) || StrKit.isBlank(areaName)) { + System.out.println("发现异常数据,请人工处理:" + file.getName()); + continue; + } + //县区名称 + System.out.println("正在进行" + cityName + "-" + areaName + "的数据填充~"); + DsKit.unCompress(file.getAbsolutePath()); + + List list4 = get4(); + //如果list4的元素个数不足7个,需要补全到7个,空的用空字符串 + if (list4.size() < 7) { + for (int i = list4.size(); i < 7; i++) { + list4.add(""); + } + } + Row outRow = outSheet.createRow(++rowIndex); + DsKit.putData(outRow, Arrays.asList(areaName, "教育特征","全县人口呈平稳增长趋势", + list4.getFirst(), list4.get(1), list4.get(2), list4.get(3), + list4.get(4), list4.get(5), list4.get(6), + cityName), dataStyle); + + } + } + } + //保存文件 + DsKit.saveExcel(excelPath, outWorkbook); + System.out.println("县区所有文件处理完成!"); + } +} \ No newline at end of file