From 87f1668bf347201485778b4621fd38eedba581a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Sun, 10 Nov 2024 09:13:32 +0800 Subject: [PATCH] 'commit' --- .../com/dsideal/base/Tools/ReadCityDoc.java | 93 +++++++++++++++++++ .../dsideal/base/{Test => Tools}/ReadDoc.java | 7 +- 2 files changed, 98 insertions(+), 2 deletions(-) create mode 100644 src/main/java/com/dsideal/base/Tools/ReadCityDoc.java rename src/main/java/com/dsideal/base/{Test => Tools}/ReadDoc.java (94%) diff --git a/src/main/java/com/dsideal/base/Tools/ReadCityDoc.java b/src/main/java/com/dsideal/base/Tools/ReadCityDoc.java new file mode 100644 index 00000000..3009563e --- /dev/null +++ b/src/main/java/com/dsideal/base/Tools/ReadCityDoc.java @@ -0,0 +1,93 @@ +package com.dsideal.base.Tools; + +import cn.hutool.core.io.FileUtil; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.util.ZipSecureFile; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.CellValue; +import org.apache.poi.ss.usermodel.FormulaEvaluator; +import org.apache.poi.xssf.usermodel.*; +import org.apache.poi.xwpf.usermodel.XWPFChart; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; + +public class ReadCityDoc { + //https://blog.csdn.net/a346736962/article/details/123037797 + public static void main(String[] args) throws IOException, InvalidFormatException { + String directoryPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; + directoryPath="D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022"; + //遍历workingPath下的所有文件,注意,需要递归所有子目录下的所有文件 + System.out.println("开始遍历目录下的所有文件"); + // 调用 Hutool 的 FileUtil.loopFiles 方法递归获取所有文件 + List files = FileUtil.loopFiles(directoryPath, file -> { + // 这里可以添加你的过滤条件,如果不需要过滤,返回 true 即可 + return true; + }); + int cnt = 0; + // 打印所有文件的路径 + for (File file : files) { + String fileName = file.getName(); + //判断是否为docx文件 + if (fileName.endsWith(".docx") && !fileName.startsWith("~")) { + cnt++; + //读取文件 + String inputUrl = file.getAbsolutePath(); + InputStream is = new FileInputStream(inputUrl); + //读取excel报错 Zip bomb detected! The file would exceed the max. + //https://blog.csdn.net/baidu_19473529/article/details/109601558 + ZipSecureFile.setMinInflateRatio(-1.0d); + XWPFDocument doc = new XWPFDocument(is); + //图表 + List charts = doc.getCharts(); + + System.out.println("图表数量=" + charts.size()); + + for (XWPFChart chart : charts) { + XSSFWorkbook workbook = chart.getWorkbook(); + XSSFSheet sheet = workbook.getSheetAt(0); + //遍历一下sheet + for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) { + //遍历行 + XSSFRow row = sheet.getRow(i); + if (row == null) continue; + for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) { + //遍历列 + XSSFCell cell = row.getCell(j); + // 创建公式计算器 + FormulaEvaluator evaluator = new XSSFFormulaEvaluator(workbook); + + // 检查单元格是否包含公式 + if (cell!=null && cell.getCellType() == CellType.FORMULA) { + // 计算公式并获取结果 + CellValue evaluatedValue = evaluator.evaluate(cell); + System.out.println("Calculated Value: " + evaluatedValue.formatAsString()); + } + if (cell != null) + System.out.print(cell + " "); + else { + System.out.print("null "); + } + } + System.out.println(); + } + } + //段落 + List paragraphs = doc.getParagraphs(); + for (XWPFParagraph paragraph : paragraphs) { + String text = paragraph.getText(); + if (text.startsWith("图")) { + System.out.println(text); + } + + } + } + } + System.out.println("共读取" + cnt + "个文件"); + } +} diff --git a/src/main/java/com/dsideal/base/Test/ReadDoc.java b/src/main/java/com/dsideal/base/Tools/ReadDoc.java similarity index 94% rename from src/main/java/com/dsideal/base/Test/ReadDoc.java rename to src/main/java/com/dsideal/base/Tools/ReadDoc.java index ed6d3c64..1b35b4a6 100644 --- a/src/main/java/com/dsideal/base/Test/ReadDoc.java +++ b/src/main/java/com/dsideal/base/Tools/ReadDoc.java @@ -1,4 +1,4 @@ -package com.dsideal.base.Test; +package com.dsideal.base.Tools; import cn.hutool.core.io.FileUtil; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; @@ -11,7 +11,10 @@ import org.apache.poi.xwpf.usermodel.XWPFChart; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; -import java.io.*; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; import java.util.List; public class ReadDoc {