main
黄海 8 months ago
parent b6f89001d6
commit af8bccbf5e

@ -9,6 +9,7 @@ import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import java.io.*;
import java.util.Arrays;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
@ -59,7 +60,7 @@ public class ReadWordTOC {
Document document = reader.read(new File(xmlPath));
Element root = document.getRootElement();// 获取根元素
List<Element> children = root.element("body").elements("p");//工作区
boolean out = false;
for (Element child : children) {
if (child.getName().equals("p")) {
// if (child.element("pPr") == null) continue;
@ -84,17 +85,46 @@ public class ReadWordTOC {
for (Element t : pChild.elements("t")) {
//此元素的字体与字号,同受它同级上面<rPr>的<rFonts>属性ascii控制
String font = pChild.element("rPr").element("rFonts").attribute("ascii").getValue();
if (font.equals("方正仿宋简体")) continue;
if (font.equals("华文楷体")) continue;
int fontSize = Integer.parseInt(pChild.element("rPr").element("sz").attribute("val").getValue());
if (fontSize != 32) continue;
if (font.equals("方正仿宋简体")) {
//加粗,有/b标签的保留其它的不保留continue掉
if (pChild.element("rPr").element("b") == null) continue;
}
content = content + t.getText();
}
}
}
}
if (!StrKit.isBlank(content))
System.out.println(content);
if (!StrKit.isBlank(content)) {
//如果content是 "图"+数字形式的,不输出
if (!content.contains("(图") && !content.contains("(图")) {
//输出全部内容
// if(content.startsWith("")) System.out.print("\t");
// //如果content是以 数字+.开头的那么多输出两个tab
// if (content.matches("^[0-9]+\\..*")) System.out.print("\t\t");
// System.out.println(content);
//只输出四和五
String[] printDx = {"一", "二", "三", "六", "七", "八", "九", "十"};
//转为 List<String>
List<String> printDxList = Arrays.asList(printDx);
if (content.startsWith("四") || content.startsWith("五")) {
out = true;
}
for (String s : printDxList) {
if (content.startsWith(s)) out = false;
}
if (out) {
if(content.startsWith("")) System.out.print("\t");
System.out.println(content);
}
}
}
}
}
}

Loading…
Cancel
Save