package com.dsideal.base.Test; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.usermodel.XWPFChart; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import java.io.*; import java.util.List; public class ReadDoc { //https://blog.csdn.net/a346736962/article/details/123037797 public static void main(String[] args) throws IOException, InvalidFormatException { String workingPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; //遍历workingPath下的所有文件 File file = new File(workingPath); String[] fileList = file.list(); for (String fileName : fileList) { //判断是否为文件夹 File tempFile = new File(workingPath + "/" + fileName); if (tempFile.isDirectory()) { continue; } //判断是否为docx文件 if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { //读取文件 String inputUrl = workingPath + "/" + fileName; InputStream is = new FileInputStream(inputUrl); XWPFDocument doc = new XWPFDocument(is); //图表 List charts = doc.getCharts(); System.out.println("图表数量="+charts.size()); for (XWPFChart chart : charts) { XSSFWorkbook workbook = chart.getWorkbook(); XSSFSheet sheet = workbook.getSheetAt(0); //遍历一下sheet for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) { //遍历行 XSSFRow row = sheet.getRow(i); if(row==null) continue; for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) { //遍历列 XSSFCell cell = row.getCell(j); if (cell != null) System.out.print(cell + " "); else{ System.out.print("null "); } } System.out.println(); } } //段落 List paragraphs = doc.getParagraphs(); for (XWPFParagraph paragraph : paragraphs) { String text = paragraph.getText(); if(text.startsWith("图")){ System.out.println(text); } } } } } }