|
|
|
package com.dsideal.base.Test;
|
|
|
|
|
|
|
|
import cn.hutool.core.io.FileUtil;
|
|
|
|
import com.jfinal.kit.StrKit;
|
|
|
|
import org.apache.commons.io.FileUtils;
|
|
|
|
import org.dom4j.Document;
|
|
|
|
import org.dom4j.DocumentException;
|
|
|
|
import org.dom4j.Element;
|
|
|
|
import org.dom4j.io.SAXReader;
|
|
|
|
|
|
|
|
import java.io.*;
|
|
|
|
import java.util.List;
|
|
|
|
import java.util.zip.ZipEntry;
|
|
|
|
import java.util.zip.ZipInputStream;
|
|
|
|
|
|
|
|
import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory;
|
|
|
|
|
|
|
|
public class ReadWordTOC {
|
|
|
|
public static void main(String[] args) throws IOException, DocumentException {
|
|
|
|
String wordPath = "c:/双柏县人口变化及其对教育的影响.docx";
|
|
|
|
//解压缩
|
|
|
|
if (new File(DocxUnzipDirectory).exists()) {
|
|
|
|
FileUtils.deleteDirectory(new File(DocxUnzipDirectory));
|
|
|
|
}
|
|
|
|
File file = new File(wordPath);//取得word文件
|
|
|
|
FileInputStream inputStream = new FileInputStream(file);
|
|
|
|
ZipInputStream zipInputStream = new ZipInputStream(inputStream);
|
|
|
|
ZipEntry entry;
|
|
|
|
byte[] ch = new byte[256];
|
|
|
|
while ((entry = zipInputStream.getNextEntry()) != null) {
|
|
|
|
File zFile = new File(DocxUnzipDirectory + entry.getName());
|
|
|
|
if (entry.isDirectory()) {
|
|
|
|
if (!zFile.exists()) {
|
|
|
|
zFile.mkdirs();
|
|
|
|
}
|
|
|
|
zipInputStream.closeEntry();
|
|
|
|
} else {
|
|
|
|
File fpath = new File(zFile.getParent());
|
|
|
|
if (!fpath.exists()) {
|
|
|
|
fpath.mkdirs();
|
|
|
|
}
|
|
|
|
FileOutputStream outputStream = new FileOutputStream(zFile);
|
|
|
|
int i;
|
|
|
|
while ((i = zipInputStream.read(ch)) != -1) {
|
|
|
|
outputStream.write(ch, 0, i);
|
|
|
|
}
|
|
|
|
zipInputStream.closeEntry();
|
|
|
|
outputStream.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
inputStream.close();
|
|
|
|
|
|
|
|
//读入XML
|
|
|
|
String xmlPath = DocxUnzipDirectory + "word\\document.xml";
|
|
|
|
///w:document/w:body/w:p/w:r/w:t
|
|
|
|
//System.out.println(FileUtil.readUtf8String(xmlPath).contains("加强"));
|
|
|
|
|
|
|
|
SAXReader reader = new SAXReader(); // 创建 SAXReader 对象,读取 XML 文件
|
|
|
|
Document document = reader.read(new File(xmlPath));
|
|
|
|
Element root = document.getRootElement();// 获取根元素
|
|
|
|
List<Element> children = root.element("body").elements("p");//工作区
|
|
|
|
|
|
|
|
for (Element child : children) {
|
|
|
|
if (child.getName().equals("p")) {
|
|
|
|
// if (child.element("pPr") == null) continue;
|
|
|
|
// if (child.element("pPr").element("rPr") == null) continue;
|
|
|
|
// if (child.element("pPr").element("rPr").element("rFonts") == null) continue;
|
|
|
|
//
|
|
|
|
// String font = child.element("pPr").element("rPr").element("rFonts").attribute("ascii").getValue();
|
|
|
|
// //子元素需要继续处理
|
|
|
|
// if (font.equals("方正仿宋简体")) continue;
|
|
|
|
// if (font.equals("华文楷体")) continue;
|
|
|
|
//
|
|
|
|
// if (child.element("pPr").element("rPr").element("sz") != null) {
|
|
|
|
// int fontSize = Integer.parseInt(child.element("pPr").element("rPr").element("sz").attribute("val").getValue());
|
|
|
|
// if (fontSize != 32) continue;
|
|
|
|
// }
|
|
|
|
|
|
|
|
List<Element> pChildren = child.elements();
|
|
|
|
String content = "";
|
|
|
|
for (Element pChild : pChildren) {
|
|
|
|
if (!pChild.getName().equals("pPr")) {
|
|
|
|
if (pChild.getName().equals("r")) {
|
|
|
|
for (Element t : pChild.elements("t")) {
|
|
|
|
//此元素的字体与字号,同受它同级上面<rPr>的<rFonts>属性ascii控制
|
|
|
|
String font = pChild.element("rPr").element("rFonts").attribute("ascii").getValue();
|
|
|
|
if (font.equals("方正仿宋简体")) continue;
|
|
|
|
if (font.equals("华文楷体")) continue;
|
|
|
|
int fontSize = Integer.parseInt(pChild.element("rPr").element("sz").attribute("val").getValue());
|
|
|
|
if (fontSize != 32) continue;
|
|
|
|
content = content + t.getText();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!StrKit.isBlank(content))
|
|
|
|
System.out.println(content);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|