main
黄海 8 months ago
parent 89b5741805
commit 4eb59e736d

@ -883,7 +883,10 @@ public class DataEaseModel {
public String getCityNameByAreaName(String areaName) {
String sql = "select id,parent_id from t_dm_area where area_name=?";
String parent_id = Db.findFirst(sql, areaName).getStr("parent_id");
Record record= Db.findFirst(sql, areaName);
if(record==null) return null;
String parent_id = record.getStr("parent_id");
if(parent_id==null) return null;
return getAreaById(parent_id).getStr("area_name");
}
}

@ -1,6 +1,7 @@
package com.dsideal.base.Test;
import cn.hutool.core.io.FileUtil;
import com.dsideal.base.DataEase.Model.DataEaseModel;
import com.jfinal.kit.StrKit;
import org.apache.commons.io.FileUtils;
import org.dom4j.Document;
@ -13,7 +14,9 @@ import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory;
public class ReadWordTOC {
@ -21,6 +24,9 @@ public class ReadWordTOC {
static String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"};
//转为 List<String>
static List<String> printDxList = Arrays.asList(printDx);
//哪些是处理不了的,就不处理了~
static String[] excludeCityList = {"~$", "磨憨-磨丁", "经开区", "阳宗海"};
/**
* Word
*
@ -69,7 +75,7 @@ public class ReadWordTOC {
Element root = document.getRootElement();// 获取根元素
List<Element> children = root.element("body").elements("p");//工作区
boolean out = false;
boolean parent = false;
int parent = 0;
for (Element child : children) {
if (child.getName().equals("p")) {
List<Element> pChildren = child.elements();
@ -94,19 +100,25 @@ public class ReadWordTOC {
//太长的不要
if (content.length() > 40) continue;
if (printDxList.contains(content.substring(0, 1))) {
if ((content.charAt(0) == '四' || content.charAt(0) == '五') && content.charAt(1) == '、') {
parent = true;
} else {
parent = false;
if (content.charAt(0) == '四' && content.charAt(1) == '、') {
parent = 4;
}
if (content.charAt(0) == '五' && content.charAt(1) == '、') {
parent = 5;
}
out = true;
}
if (StrKit.isBlank(content.trim())) continue;
if (out && parent) {
if (content.startsWith("")) System.out.print("\t");
if (content.matches("^[0-9]+\\..*")) System.out.print("\t\t");
System.out.println(content.split("。")[0]);
if (out && parent > 0) {
if (!content.startsWith("")) continue;
if (parent == 4) {
System.out.println("==================四===============");
System.out.println(content.split("。")[0]);
}
if (parent == 5) {
System.out.println("==================五===============");
System.out.println(content.split("。")[0]);
}
out = false;
}
}
@ -115,8 +127,10 @@ public class ReadWordTOC {
}
}
static DataEaseModel dm = new DataEaseModel();
public static void main(String[] args) throws IOException, DocumentException {
//初始化数据库连接
//初始化数据库连接
LocalMysqlConnectUtil.Init();
@ -130,6 +144,25 @@ public class ReadWordTOC {
String fileName = file.getName();
//判断是否为docx文件
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
boolean flag = false;
for (String s : excludeCityList) {
if (file.getName().contains(s)) {
flag = true;
break;
}
}
if (flag) continue;
//县区名称
String areaName = dm.getAreaName(file.getName());
//市州名称
String cityName = dm.getCityNameByAreaName(areaName);
if (StrKit.isBlank(cityName) || StrKit.isBlank(areaName)) {
System.out.println("发现异常数据,请人工处理:" + file.getName());
continue;
}
//县区名称
System.out.println("正在进行" + cityName + "-" + areaName + "的数据填充~");
getToc(file.getAbsolutePath());
}
}

Loading…
Cancel
Save