From d1b14d3759d51fc04ecce4743790837eae32228c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Fri, 22 Nov 2024 11:54:07 +0800 Subject: [PATCH] 'commit' --- .../com/dsideal/base/Test/ReadWordTOC.java | 52 +++++-------------- 1 file changed, 14 insertions(+), 38 deletions(-) diff --git a/src/main/java/com/dsideal/base/Test/ReadWordTOC.java b/src/main/java/com/dsideal/base/Test/ReadWordTOC.java index 1073b468..37228d4e 100644 --- a/src/main/java/com/dsideal/base/Test/ReadWordTOC.java +++ b/src/main/java/com/dsideal/base/Test/ReadWordTOC.java @@ -18,7 +18,7 @@ import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirecto public class ReadWordTOC { public static void main(String[] args) throws IOException, DocumentException { - String wordPath = "c:/双柏县人口变化及其对教育的影响.docx"; + String wordPath = "c:/2.docx"; //解压缩 if (new File(DocxUnzipDirectory).exists()) { FileUtils.deleteDirectory(new File(DocxUnzipDirectory)); @@ -53,8 +53,6 @@ public class ReadWordTOC { //读入XML String xmlPath = DocxUnzipDirectory + "word\\document.xml"; - ///w:document/w:body/w:p/w:r/w:t - //System.out.println(FileUtil.readUtf8String(xmlPath).contains("加强")); SAXReader reader = new SAXReader(); // 创建 SAXReader 对象,读取 XML 文件 Document document = reader.read(new File(xmlPath)); @@ -63,36 +61,12 @@ public class ReadWordTOC { boolean out = false; for (Element child : children) { if (child.getName().equals("p")) { -// if (child.element("pPr") == null) continue; -// if (child.element("pPr").element("rPr") == null) continue; -// if (child.element("pPr").element("rPr").element("rFonts") == null) continue; -// -// String font = child.element("pPr").element("rPr").element("rFonts").attribute("ascii").getValue(); -// //子元素需要继续处理 -// if (font.equals("方正仿宋简体")) continue; -// if (font.equals("华文楷体")) continue; -// -// if (child.element("pPr").element("rPr").element("sz") != null) { -// int fontSize = Integer.parseInt(child.element("pPr").element("rPr").element("sz").attribute("val").getValue()); -// if (fontSize != 32) continue; -// } - List pChildren = child.elements(); String content = ""; for (Element pChild : pChildren) { if (!pChild.getName().equals("pPr")) { if (pChild.getName().equals("r")) { for (Element t : pChild.elements("t")) { - //此元素的字体与字号,同受它同级上面属性ascii控制 - String font = pChild.element("rPr").element("rFonts").attribute("ascii").getValue(); - if (font.equals("华文楷体")) continue; - int fontSize = Integer.parseInt(pChild.element("rPr").element("sz").attribute("val").getValue()); - if (fontSize != 32) continue; - if (font.equals("方正仿宋简体")) { - //加粗,有/b标签的保留,其它的不保留continue掉 - if (pChild.element("rPr").element("b") == null) continue; - } - content = content + t.getText(); } } @@ -102,26 +76,28 @@ public class ReadWordTOC { if (!StrKit.isBlank(content)) { //如果content是 "图"+数字形式的,不输出 if (!content.contains("(图") && !content.contains("(图")) { - //输出全部内容 -// if(content.startsWith("(")) System.out.print("\t"); -// //如果content是以 数字+.开头的,那么多输出两个tab -// if (content.matches("^[0-9]+\\..*")) System.out.print("\t\t"); -// System.out.println(content); //只输出四和五 - String[] printDx = {"一", "二", "三", "六", "七", "八", "九", "十"}; + String[] printDx = {"一", "二", "三", "四", "五", "六", "七", "八", "九", "十"}; //转为 List List printDxList = Arrays.asList(printDx); - - if (content.startsWith("四") || content.startsWith("五")) { + //如果文字不是以上面printDx中的某一个开头,而且不是以数字+.开头,不输出 + if (content.startsWith("(")) { out = true; } - for (String s : printDxList) { - if (content.startsWith(s)) out = false; + //如果content第一位是数字,第二位是小数点 + if (content.length() > 1 && content.charAt(1) == '.' && (content.charAt(0) >= '0' && content.charAt(0) <= '9')) { + out = true; + } + if (printDxList.contains(content.substring(0, 1))) { + out = true; } + if (StrKit.isBlank(content.trim())) continue; if (out) { - if(content.startsWith("(")) System.out.print("\t"); + if (content.startsWith("(")) System.out.print("\t"); + if (content.matches("^[0-9]+\\..*")) System.out.print("\t\t"); System.out.println(content); + out = false; } } }