|
|
|
@ -0,0 +1,182 @@
|
|
|
|
|
package com.dsideal.base.Tools.FillData.Area;
|
|
|
|
|
|
|
|
|
|
import cn.hutool.core.io.FileUtil;
|
|
|
|
|
import com.dsideal.base.DataEase.Model.DataEaseModel;
|
|
|
|
|
import com.dsideal.base.Tools.FillData.DataEaseKit.DsKit;
|
|
|
|
|
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
|
|
|
|
|
import com.jfinal.kit.StrKit;
|
|
|
|
|
import org.apache.commons.io.FileUtils;
|
|
|
|
|
import org.apache.poi.ss.usermodel.Row;
|
|
|
|
|
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
|
|
|
|
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
|
|
|
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
|
|
|
|
import org.dom4j.Document;
|
|
|
|
|
import org.dom4j.DocumentException;
|
|
|
|
|
import org.dom4j.Element;
|
|
|
|
|
import org.dom4j.io.SAXReader;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.FileInputStream;
|
|
|
|
|
import java.io.FileOutputStream;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.Collections;
|
|
|
|
|
import java.util.List;
|
|
|
|
|
import java.util.zip.ZipEntry;
|
|
|
|
|
import java.util.zip.ZipInputStream;
|
|
|
|
|
|
|
|
|
|
import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory;
|
|
|
|
|
|
|
|
|
|
public class A13 {
|
|
|
|
|
//只输出四和五
|
|
|
|
|
static String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"};
|
|
|
|
|
//转为 List<String>
|
|
|
|
|
static List<String> printDxList = Arrays.asList(printDx);
|
|
|
|
|
//哪些是处理不了的,就不处理了~
|
|
|
|
|
static String[] excludeCityList = {"~$", "磨憨-磨丁", "经开区", "阳宗海"};
|
|
|
|
|
//示例Excel
|
|
|
|
|
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\区\\【13】教育特征决策建议\\【云南省】教育特征-决策建议.xlsx";
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* 提取Word文档中的目录信息
|
|
|
|
|
*
|
|
|
|
|
* @param wordPath
|
|
|
|
|
* @throws DocumentException
|
|
|
|
|
* @throws IOException
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static DataEaseModel dm = new DataEaseModel();
|
|
|
|
|
|
|
|
|
|
public static List<String> get4() throws DocumentException {
|
|
|
|
|
List<String> list = new ArrayList<>();
|
|
|
|
|
//读入XML
|
|
|
|
|
String xmlPath = DocxUnzipDirectory + "word\\document.xml";
|
|
|
|
|
SAXReader reader = new SAXReader(); // 创建 SAXReader 对象,读取 XML 文件
|
|
|
|
|
Document document = reader.read(new File(xmlPath));
|
|
|
|
|
Element root = document.getRootElement();// 获取根元素
|
|
|
|
|
List<Element> children = root.element("body").elements("p");//工作区
|
|
|
|
|
boolean out = false;
|
|
|
|
|
int parent = 0;
|
|
|
|
|
for (Element child : children) {
|
|
|
|
|
if (child.getName().equals("p")) {
|
|
|
|
|
List<Element> pChildren = child.elements();
|
|
|
|
|
String content = "";
|
|
|
|
|
for (Element pChild : pChildren) {
|
|
|
|
|
if (!pChild.getName().equals("pPr")) {
|
|
|
|
|
if (pChild.getName().equals("r")) {
|
|
|
|
|
for (Element t : pChild.elements("t")) {
|
|
|
|
|
content = content + t.getText();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!StrKit.isBlank(content)) {
|
|
|
|
|
//如果content是 "图"+数字形式的,不输出
|
|
|
|
|
if (!content.contains("(图") && !content.contains("(图")) {
|
|
|
|
|
//如果文字不是以上面printDx中的某一个开头,而且不是以数字+.开头,不输出
|
|
|
|
|
if (content.startsWith("(") && printDxList.contains(String.valueOf(content.charAt(1)))) {
|
|
|
|
|
out = true;
|
|
|
|
|
}
|
|
|
|
|
//太长的不要
|
|
|
|
|
if (content.length() > 40) continue;
|
|
|
|
|
if (printDxList.contains(content.substring(0, 1))) {
|
|
|
|
|
if (content.charAt(0) == '四' && content.charAt(1) == '、') {
|
|
|
|
|
parent = 4;
|
|
|
|
|
}
|
|
|
|
|
if (content.charAt(0) == '五' && content.charAt(1) == '、') {
|
|
|
|
|
parent = 5;
|
|
|
|
|
}
|
|
|
|
|
out = true;
|
|
|
|
|
}
|
|
|
|
|
if (out && parent > 0) {
|
|
|
|
|
if (!content.startsWith("(")) continue;
|
|
|
|
|
if (parent == 4) {
|
|
|
|
|
list.add(content.split("。")[0]);
|
|
|
|
|
}
|
|
|
|
|
// if (parent == 5) {
|
|
|
|
|
// System.out.println("==================五===============");
|
|
|
|
|
// System.out.println(content.split("。")[0]);
|
|
|
|
|
// }
|
|
|
|
|
out = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return list;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static void main(String[] args) throws IOException, DocumentException {
|
|
|
|
|
//初始化数据库连接
|
|
|
|
|
LocalMysqlConnectUtil.Init();
|
|
|
|
|
//结果Excel
|
|
|
|
|
XSSFWorkbook outWorkbook = new XSSFWorkbook();
|
|
|
|
|
//结果Sheet
|
|
|
|
|
XSSFSheet outSheet = DsKit.createSheet(outWorkbook);
|
|
|
|
|
//样式
|
|
|
|
|
XSSFCellStyle headerStyle = DsKit.getHeaderStyle(outWorkbook);
|
|
|
|
|
XSSFCellStyle dataStyle = DsKit.getDataStyle(outWorkbook);
|
|
|
|
|
//如果样例文件是xls格式,则转化为xlsx格式
|
|
|
|
|
sampleExcelPath = DsKit.convertXlsToXlsx(sampleExcelPath);
|
|
|
|
|
//拷贝文件头
|
|
|
|
|
DsKit.copyHead(sampleExcelPath, outSheet, headerStyle);
|
|
|
|
|
|
|
|
|
|
//目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死
|
|
|
|
|
String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx");
|
|
|
|
|
DsKit.delExcel(excelPath);
|
|
|
|
|
|
|
|
|
|
String parentPath = "D:\\dsidealDoc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告";
|
|
|
|
|
List<File> files = FileUtil.loopFiles(parentPath, file -> true);
|
|
|
|
|
int rowIndex = 0;
|
|
|
|
|
//处理这个目录
|
|
|
|
|
if (files != null) {
|
|
|
|
|
for (File file : files) {
|
|
|
|
|
//判断file是不是目录,是目录的需要跳过
|
|
|
|
|
if (file.isDirectory()) continue;
|
|
|
|
|
String fileName = file.getName();
|
|
|
|
|
//判断是否为docx文件
|
|
|
|
|
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
|
|
|
|
|
boolean flag = false;
|
|
|
|
|
for (String s : excludeCityList) {
|
|
|
|
|
if (file.getName().contains(s)) {
|
|
|
|
|
flag = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (flag) continue;
|
|
|
|
|
//县区名称
|
|
|
|
|
String areaName = dm.getAreaName(file.getName());
|
|
|
|
|
//市州名称
|
|
|
|
|
String cityName = dm.getCityNameByAreaName(areaName);
|
|
|
|
|
|
|
|
|
|
if (StrKit.isBlank(cityName) || StrKit.isBlank(areaName)) {
|
|
|
|
|
System.out.println("发现异常数据,请人工处理:" + file.getName());
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
//县区名称
|
|
|
|
|
System.out.println("正在进行" + cityName + "-" + areaName + "的数据填充~");
|
|
|
|
|
DsKit.unCompress(file.getAbsolutePath());
|
|
|
|
|
|
|
|
|
|
List<String> list4 = get4();
|
|
|
|
|
//如果list4的元素个数不足7个,需要补全到7个,空的用空字符串
|
|
|
|
|
if (list4.size() < 7) {
|
|
|
|
|
for (int i = list4.size(); i < 7; i++) {
|
|
|
|
|
list4.add("");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Row outRow = outSheet.createRow(++rowIndex);
|
|
|
|
|
DsKit.putData(outRow, Arrays.asList(areaName, "教育特征","全县人口呈平稳增长趋势",
|
|
|
|
|
list4.getFirst(), list4.get(1), list4.get(2), list4.get(3),
|
|
|
|
|
list4.get(4), list4.get(5), list4.get(6),
|
|
|
|
|
cityName), dataStyle);
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//保存文件
|
|
|
|
|
DsKit.saveExcel(excelPath, outWorkbook);
|
|
|
|
|
System.out.println("县区所有文件处理完成!");
|
|
|
|
|
}
|
|
|
|
|
}
|