From af8bccbf5eb46b2e712e355796525b5a4404da88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Fri, 22 Nov 2024 11:30:07 +0800 Subject: [PATCH] 'commit' --- .../com/dsideal/base/Test/ReadWordTOC.java | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/dsideal/base/Test/ReadWordTOC.java b/src/main/java/com/dsideal/base/Test/ReadWordTOC.java index ab33880f..1073b468 100644 --- a/src/main/java/com/dsideal/base/Test/ReadWordTOC.java +++ b/src/main/java/com/dsideal/base/Test/ReadWordTOC.java @@ -9,6 +9,7 @@ import org.dom4j.Element; import org.dom4j.io.SAXReader; import java.io.*; +import java.util.Arrays; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -59,7 +60,7 @@ public class ReadWordTOC { Document document = reader.read(new File(xmlPath)); Element root = document.getRootElement();// 获取根元素 List children = root.element("body").elements("p");//工作区 - + boolean out = false; for (Element child : children) { if (child.getName().equals("p")) { // if (child.element("pPr") == null) continue; @@ -84,17 +85,46 @@ public class ReadWordTOC { for (Element t : pChild.elements("t")) { //此元素的字体与字号,同受它同级上面属性ascii控制 String font = pChild.element("rPr").element("rFonts").attribute("ascii").getValue(); - if (font.equals("方正仿宋简体")) continue; if (font.equals("华文楷体")) continue; int fontSize = Integer.parseInt(pChild.element("rPr").element("sz").attribute("val").getValue()); if (fontSize != 32) continue; + if (font.equals("方正仿宋简体")) { + //加粗,有/b标签的保留,其它的不保留continue掉 + if (pChild.element("rPr").element("b") == null) continue; + } + content = content + t.getText(); } } } } - if (!StrKit.isBlank(content)) - System.out.println(content); + + if (!StrKit.isBlank(content)) { + //如果content是 "图"+数字形式的,不输出 + if (!content.contains("(图") && !content.contains("(图")) { + //输出全部内容 +// if(content.startsWith("(")) System.out.print("\t"); +// //如果content是以 数字+.开头的,那么多输出两个tab +// if (content.matches("^[0-9]+\\..*")) System.out.print("\t\t"); +// System.out.println(content); + //只输出四和五 + String[] printDx = {"一", "二", "三", "六", "七", "八", "九", "十"}; + //转为 List + List printDxList = Arrays.asList(printDx); + + if (content.startsWith("四") || content.startsWith("五")) { + out = true; + } + for (String s : printDxList) { + if (content.startsWith(s)) out = false; + } + + if (out) { + if(content.startsWith("(")) System.out.print("\t"); + System.out.println(content); + } + } + } } } }