package com.dsideal.base.Test; import cn.hutool.core.io.FileUtil; import org.apache.commons.io.FileUtils; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; import java.io.*; import java.util.List; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import static com.dsideal.base.Tools.FillData.DataEaseKit.DsKit.DocxUnzipDirectory; public class ReadWordTOC { public static void main(String[] args) throws IOException, DocumentException { String wordPath = "c:/双柏县人口变化及其对教育的影响.docx"; //解压缩 if (new File(DocxUnzipDirectory).exists()) { FileUtils.deleteDirectory(new File(DocxUnzipDirectory)); } File file = new File(wordPath);//取得word文件 FileInputStream inputStream = new FileInputStream(file); ZipInputStream zipInputStream = new ZipInputStream(inputStream); ZipEntry entry; byte[] ch = new byte[256]; while ((entry = zipInputStream.getNextEntry()) != null) { File zFile = new File(DocxUnzipDirectory + entry.getName()); if (entry.isDirectory()) { if (!zFile.exists()) { zFile.mkdirs(); } zipInputStream.closeEntry(); } else { File fpath = new File(zFile.getParent()); if (!fpath.exists()) { fpath.mkdirs(); } FileOutputStream outputStream = new FileOutputStream(zFile); int i; while ((i = zipInputStream.read(ch)) != -1) { outputStream.write(ch, 0, i); } zipInputStream.closeEntry(); outputStream.close(); } } inputStream.close(); //读入XML String xmlPath = DocxUnzipDirectory + "word\\document.xml"; ///w:document/w:body/w:p/w:r/w:t //System.out.println(FileUtil.readUtf8String(xmlPath).contains("加强")); SAXReader reader = new SAXReader(); // 创建 SAXReader 对象,读取 XML 文件 Document document = reader.read(new File(xmlPath)); Element root = document.getRootElement();// 获取根元素 List children = root.element("body").elements("p");//工作区 for (Element child : children) { if (child.getName().equals("p")) { List pChildren = child.elements(); boolean isBookmark = false; String content = ""; for (Element pChild : pChildren) { if (pChild.getName().equals("bookmarkStart")) { isBookmark = true; } if (isBookmark && !pChild.getName().equals("bookmarkStart") && !pChild.getName().equals("bookmarkEnd")) { if (pChild.getName().equals("r")) { for (Element t : pChild.elements("t")) { content = content + t.getText(); } } } if (pChild.getName().equals("bookmarkEnd")) { isBookmark = false; System.out.println(content); } } } } } }