'commit'

8 months ago · 4049d5f798
parent 736c1dfd95
commit 4049d5f798
3 changed files with 40 additions and 17 deletions
--- a/src/main/java/com/dsideal/base/Test/ReadWordTOC.java
+++ b/src/main/java/com/dsideal/base/Test/ReadWordTOC.java
@ -13,12 +13,22 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipInputStream;
-
+import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
 import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory;

 public class ReadWordTOC {
-    public static void main(String[] args) throws IOException, DocumentException {
-        String wordPath = "c:/4.docx";
+    //只输出四和五
+    static String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"};
+    //转为 List<String>
+    static List<String> printDxList = Arrays.asList(printDx);
+    /**
+     * 提取Word文档中的目录信息
+     *
+     * @param wordPath
+     * @throws DocumentException
+     * @throws IOException
+     */
+    public static void getToc(String wordPath) throws DocumentException, IOException {
        //解压缩
        if (new File(DocxUnzipDirectory).exists()) {
            FileUtils.deleteDirectory(new File(DocxUnzipDirectory));
@ -77,20 +87,14 @@ public class ReadWordTOC {
                if (!StrKit.isBlank(content)) {
                    //如果content是 "图"+数字形式的，不输出
                    if (!content.contains("(图") && !content.contains("（图")) {
-                        //只输出四和五
-                        String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"};
-                        //转为 List<String>
-                        List<String> printDxList = Arrays.asList(printDx);
                        //如果文字不是以上面printDx中的某一个开头，而且不是以数字+.开头，不输出
-                        if (content.startsWith("（")) {
-                            out = true;
-                        }
-                        //如果content第一位是数字，第二位是小数点
-                        if (content.length() > 1 && content.charAt(1) == '.' && (content.charAt(0) >= '0' && content.charAt(0) <= '9')) {
+                        if (content.startsWith("（") && printDxList.contains(String.valueOf(content.charAt(1)))) {
                            out = true;
                        }
+                        //太长的不要
+                        if (content.length() > 40) continue;
                        if (printDxList.contains(content.substring(0, 1))) {
-                            if ((content.substring(0, 1).equals("四") || content.substring(0, 1).equals("五"))) {
+                            if ((content.charAt(0) == '四' || content.charAt(0) == '五') && content.charAt(1) == '、') {
                                parent = true;
                            } else {
                                parent = false;
@ -102,7 +106,7 @@ public class ReadWordTOC {
                        if (out && parent) {
                            if (content.startsWith("（")) System.out.print("\t");
                            if (content.matches("^[0-9]+\\..*")) System.out.print("\t\t");
-                            System.out.println(content);
+                            System.out.println(content.split("。")[0]);
                            out = false;
                        }
                    }
@ -110,4 +114,25 @@ public class ReadWordTOC {
            }
        }
    }
+
+    public static void main(String[] args) throws IOException, DocumentException {
+                //初始化数据库连接
+        LocalMysqlConnectUtil.Init();
+
+
+        String parentPath = "D:\\dsidealDoc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告";
+        List<File> files = FileUtil.loopFiles(parentPath, file -> true);
+        //处理这个目录
+        if (files != null) {
+            for (File file : files) {
+                //判断file是不是目录，是目录的需要跳过
+                if (file.isDirectory()) continue;
+                String fileName = file.getName();
+                //判断是否为docx文件
+                if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
+                    getToc(file.getAbsolutePath());
+                }
+            }
+        }
+    }
 }
--- a/src/main/java/com/dsideal/base/Tools/FillData/Area/A4.java
+++ b/src/main/java/com/dsideal/base/Tools/FillData/Area/A4.java
@ -122,7 +122,7 @@ public class A4 {
                            v = Integer.parseInt(stringList.get(DsKit.transLetter2Num("G")).split("\\.")[0]);
                        }
                        Row outRow = outSheet.createRow(++rowIndex);
-                        DsKit.putData(outRow, Arrays.asList(String.valueOf(year), "", "总招生数", stageName, "", String.valueOf(v), areaName, cityName), dataStyle);
+                        DsKit.putData(outRow, Arrays.asList(String.valueOf(year), "", "总在校生", stageName, "", String.valueOf(v), areaName, cityName), dataStyle);
                    }

                    //2022入园基数
--- a/src/main/java/com/dsideal/base/Tools/FillData/City/C1.java
+++ b/src/main/java/com/dsideal/base/Tools/FillData/City/C1.java
@ -27,8 +27,6 @@ public class C1 {
    public static void main(String[] args) throws IOException, InvalidFormatException, InterruptedException, DocumentException {
        //初始化数据库连接
        LocalMysqlConnectUtil.Init();
-        //实例化
-        

        //目标Excel,就是把文件名解析出来后，后面添加上【成果】,需要动态计算获取，不能写死
        String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx");