main
黄海 9 months ago
parent 753a094175
commit 04a27808ba

@ -1,5 +1,6 @@
package com.dsideal.base.Test;
import cn.hutool.core.io.FileUtil;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
@ -15,26 +16,30 @@ import java.util.List;
public class ReadDoc {
//https://blog.csdn.net/a346736962/article/details/123037797
public static void main(String[] args) throws IOException, InvalidFormatException {
String workingPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510";
//遍历workingPath下的所有文件
File file = new File(workingPath);
String[] fileList = file.list();
for (String fileName : fileList) {
//判断是否为文件夹
File tempFile = new File(workingPath + "/" + fileName);
if (tempFile.isDirectory()) {
continue;
}
String directoryPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510";
directoryPath="D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022";
//遍历workingPath下的所有文件,注意,需要递归所有子目录下的所有文件
System.out.println("开始遍历目录下的所有文件");
// 调用 Hutool 的 FileUtil.loopFiles 方法递归获取所有文件
List<File> files = FileUtil.loopFiles(directoryPath, file -> {
// 这里可以添加你的过滤条件,如果不需要过滤,返回 true 即可
return true;
});
int cnt = 0;
// 打印所有文件的路径
for (File file : files) {
String fileName = file.getName();
//判断是否为docx文件
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
cnt++;
//读取文件
String inputUrl = workingPath + "/" + fileName;
String inputUrl = file.getAbsolutePath();
InputStream is = new FileInputStream(inputUrl);
XWPFDocument doc = new XWPFDocument(is);
//图表
List<XWPFChart> charts = doc.getCharts();
System.out.println("图表数量="+charts.size());
System.out.println("图表数量=" + charts.size());
for (XWPFChart chart : charts) {
XSSFWorkbook workbook = chart.getWorkbook();
@ -43,13 +48,13 @@ public class ReadDoc {
for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) {
//遍历行
XSSFRow row = sheet.getRow(i);
if(row==null) continue;
if (row == null) continue;
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
//遍历列
XSSFCell cell = row.getCell(j);
if (cell != null)
System.out.print(cell + " ");
else{
else {
System.out.print("null ");
}
}
@ -60,13 +65,13 @@ public class ReadDoc {
List<XWPFParagraph> paragraphs = doc.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
String text = paragraph.getText();
if(text.startsWith("图")){
if (text.startsWith("图")) {
System.out.println(text);
}
}
}
}
System.out.println("共读取" + cnt + "个文件");
}
}

Loading…
Cancel
Save