diff --git a/src/main/java/com/dsideal/base/Test/ReadDoc.java b/src/main/java/com/dsideal/base/Test/ReadDoc.java index 63e30891..83162b82 100644 --- a/src/main/java/com/dsideal/base/Test/ReadDoc.java +++ b/src/main/java/com/dsideal/base/Test/ReadDoc.java @@ -1,5 +1,6 @@ package com.dsideal.base.Test; +import cn.hutool.core.io.FileUtil; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.xssf.usermodel.XSSFCell; import org.apache.poi.xssf.usermodel.XSSFRow; @@ -15,26 +16,30 @@ import java.util.List; public class ReadDoc { //https://blog.csdn.net/a346736962/article/details/123037797 public static void main(String[] args) throws IOException, InvalidFormatException { - String workingPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; - //遍历workingPath下的所有文件 - File file = new File(workingPath); - String[] fileList = file.list(); - for (String fileName : fileList) { - //判断是否为文件夹 - File tempFile = new File(workingPath + "/" + fileName); - if (tempFile.isDirectory()) { - continue; - } + String directoryPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; + directoryPath="D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022"; + //遍历workingPath下的所有文件,注意,需要递归所有子目录下的所有文件 + System.out.println("开始遍历目录下的所有文件"); + // 调用 Hutool 的 FileUtil.loopFiles 方法递归获取所有文件 + List files = FileUtil.loopFiles(directoryPath, file -> { + // 这里可以添加你的过滤条件,如果不需要过滤,返回 true 即可 + return true; + }); + int cnt = 0; + // 打印所有文件的路径 + for (File file : files) { + String fileName = file.getName(); //判断是否为docx文件 if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { + cnt++; //读取文件 - String inputUrl = workingPath + "/" + fileName; + String inputUrl = file.getAbsolutePath(); InputStream is = new FileInputStream(inputUrl); XWPFDocument doc = new XWPFDocument(is); //图表 List charts = doc.getCharts(); - System.out.println("图表数量="+charts.size()); + System.out.println("图表数量=" + charts.size()); for (XWPFChart chart : charts) { XSSFWorkbook workbook = chart.getWorkbook(); @@ -43,13 +48,13 @@ public class ReadDoc { for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) { //遍历行 XSSFRow row = sheet.getRow(i); - if(row==null) continue; + if (row == null) continue; for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) { //遍历列 XSSFCell cell = row.getCell(j); if (cell != null) System.out.print(cell + " "); - else{ + else { System.out.print("null "); } } @@ -60,13 +65,13 @@ public class ReadDoc { List paragraphs = doc.getParagraphs(); for (XWPFParagraph paragraph : paragraphs) { String text = paragraph.getText(); - if(text.startsWith("图")){ + if (text.startsWith("图")) { System.out.println(text); } } } } - + System.out.println("共读取" + cnt + "个文件"); } }