main
黄海 8 months ago
parent 4eb59e736d
commit 142927c516

@ -0,0 +1,182 @@
package com.dsideal.base.Tools.FillData.Area;
import cn.hutool.core.io.FileUtil;
import com.dsideal.base.DataEase.Model.DataEaseModel;
import com.dsideal.base.Tools.FillData.DataEaseKit.DsKit;
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
import com.jfinal.kit.StrKit;
import org.apache.commons.io.FileUtils;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory;
public class A13 {
//只输出四和五
static String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"};
//转为 List<String>
static List<String> printDxList = Arrays.asList(printDx);
//哪些是处理不了的,就不处理了~
static String[] excludeCityList = {"~$", "磨憨-磨丁", "经开区", "阳宗海"};
//示例Excel
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\区\\【13】教育特征决策建议\\【云南省】教育特征-决策建议.xlsx";
/**
* Word
*
* @param wordPath
* @throws DocumentException
* @throws IOException
*/
static DataEaseModel dm = new DataEaseModel();
public static List<String> get4() throws DocumentException {
List<String> list = new ArrayList<>();
//读入XML
String xmlPath = DocxUnzipDirectory + "word\\document.xml";
SAXReader reader = new SAXReader(); // 创建 SAXReader 对象,读取 XML 文件
Document document = reader.read(new File(xmlPath));
Element root = document.getRootElement();// 获取根元素
List<Element> children = root.element("body").elements("p");//工作区
boolean out = false;
int parent = 0;
for (Element child : children) {
if (child.getName().equals("p")) {
List<Element> pChildren = child.elements();
String content = "";
for (Element pChild : pChildren) {
if (!pChild.getName().equals("pPr")) {
if (pChild.getName().equals("r")) {
for (Element t : pChild.elements("t")) {
content = content + t.getText();
}
}
}
}
if (!StrKit.isBlank(content)) {
//如果content是 "图"+数字形式的,不输出
if (!content.contains("(图") && !content.contains("(图")) {
//如果文字不是以上面printDx中的某一个开头而且不是以数字+.开头,不输出
if (content.startsWith("") && printDxList.contains(String.valueOf(content.charAt(1)))) {
out = true;
}
//太长的不要
if (content.length() > 40) continue;
if (printDxList.contains(content.substring(0, 1))) {
if (content.charAt(0) == '四' && content.charAt(1) == '、') {
parent = 4;
}
if (content.charAt(0) == '五' && content.charAt(1) == '、') {
parent = 5;
}
out = true;
}
if (out && parent > 0) {
if (!content.startsWith("")) continue;
if (parent == 4) {
list.add(content.split("。")[0]);
}
// if (parent == 5) {
// System.out.println("==================五===============");
// System.out.println(content.split("。")[0]);
// }
out = false;
}
}
}
}
}
return list;
}
public static void main(String[] args) throws IOException, DocumentException {
//初始化数据库连接
LocalMysqlConnectUtil.Init();
//结果Excel
XSSFWorkbook outWorkbook = new XSSFWorkbook();
//结果Sheet
XSSFSheet outSheet = DsKit.createSheet(outWorkbook);
//样式
XSSFCellStyle headerStyle = DsKit.getHeaderStyle(outWorkbook);
XSSFCellStyle dataStyle = DsKit.getDataStyle(outWorkbook);
//如果样例文件是xls格式则转化为xlsx格式
sampleExcelPath = DsKit.convertXlsToXlsx(sampleExcelPath);
//拷贝文件头
DsKit.copyHead(sampleExcelPath, outSheet, headerStyle);
//目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死
String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx");
DsKit.delExcel(excelPath);
String parentPath = "D:\\dsidealDoc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告";
List<File> files = FileUtil.loopFiles(parentPath, file -> true);
int rowIndex = 0;
//处理这个目录
if (files != null) {
for (File file : files) {
//判断file是不是目录是目录的需要跳过
if (file.isDirectory()) continue;
String fileName = file.getName();
//判断是否为docx文件
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
boolean flag = false;
for (String s : excludeCityList) {
if (file.getName().contains(s)) {
flag = true;
break;
}
}
if (flag) continue;
//县区名称
String areaName = dm.getAreaName(file.getName());
//市州名称
String cityName = dm.getCityNameByAreaName(areaName);
if (StrKit.isBlank(cityName) || StrKit.isBlank(areaName)) {
System.out.println("发现异常数据,请人工处理:" + file.getName());
continue;
}
//县区名称
System.out.println("正在进行" + cityName + "-" + areaName + "的数据填充~");
DsKit.unCompress(file.getAbsolutePath());
List<String> list4 = get4();
//如果list4的元素个数不足7个需要补全到7个空的用空字符串
if (list4.size() < 7) {
for (int i = list4.size(); i < 7; i++) {
list4.add("");
}
}
Row outRow = outSheet.createRow(++rowIndex);
DsKit.putData(outRow, Arrays.asList(areaName, "教育特征","全县人口呈平稳增长趋势",
list4.getFirst(), list4.get(1), list4.get(2), list4.get(3),
list4.get(4), list4.get(5), list4.get(6),
cityName), dataStyle);
}
}
}
//保存文件
DsKit.saveExcel(excelPath, outWorkbook);
System.out.println("县区所有文件处理完成!");
}
}
Loading…
Cancel
Save