main
黄海 9 months ago
parent 87f1668bf3
commit c511ce4b7c

@ -1,93 +1,132 @@
package com.dsideal.base.Tools; package com.dsideal.base.Tools;
import cn.hutool.core.io.FileUtil; import com.dsideal.base.DataEase.Model.DataEaseModel;
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.util.ZipSecureFile; import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.CellValue;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.xssf.usermodel.*; import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.xwpf.usermodel.XWPFChart; import org.apache.poi.xwpf.usermodel.XWPFChart;
import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFFootnote;
import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.io.File; import java.io.*;
import java.io.FileInputStream; import java.util.ArrayList;
import java.io.IOException;
import java.io.InputStream;
import java.util.List; import java.util.List;
import com.jfinal.plugin.activerecord.Record;
public class ReadCityDoc { public class ReadCityDoc {
//https://blog.csdn.net/a346736962/article/details/123037797 public static DataEaseModel dm = new DataEaseModel();
/**
*
*
* @return
*/
public static List<String> getCityNameList() {
//获取云南省下的所有市州
String privinceName = "云南省";
String privinceId = dm.getAreaByName(privinceName).getStr("id");
List<Record> listCity = dm.getAreaList(privinceId);
List<String> listCityName = new ArrayList<>();
for (Record record : listCity) {
listCityName.add(record.getStr("area_name"));
}
return listCityName;
}
/**
*
*
* @param fileName
* @return
*/
public static String getCityName(String fileName) {
//云南省下所有城市
List<String> listCity = getCityNameList();
//判断是哪个城市
boolean found = false;
String cityName = "";
for (String s : listCity) {
if (fileName.contains(s)) {
found = true;
cityName = s;
}
}
if (!found) {
System.out.println("未找到匹配的城市");
System.exit(0);
}
return cityName;
}
public static void main(String[] args) throws IOException, InvalidFormatException { public static void main(String[] args) throws IOException, InvalidFormatException {
String directoryPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510"; //初始化数据库连接
directoryPath="D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022"; LocalMysqlConnectUtil.Init();
//遍历workingPath下的所有文件,注意,需要递归所有子目录下的所有文件
System.out.println("开始遍历目录下的所有文件");
// 调用 Hutool 的 FileUtil.loopFiles 方法递归获取所有文件
List<File> files = FileUtil.loopFiles(directoryPath, file -> {
// 这里可以添加你的过滤条件,如果不需要过滤,返回 true 即可
return true;
});
int cnt = 0;
// 打印所有文件的路径
for (File file : files) {
String fileName = file.getName();
//判断是否为docx文件
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
cnt++;
//读取文件
String inputUrl = file.getAbsolutePath();
InputStream is = new FileInputStream(inputUrl);
//读取excel报错 Zip bomb detected! The file would exceed the max.
//https://blog.csdn.net/baidu_19473529/article/details/109601558
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//图表
List<XWPFChart> charts = doc.getCharts();
System.out.println("图表数量=" + charts.size());
for (XWPFChart chart : charts) { //开始读取word文档
XSSFWorkbook workbook = chart.getWorkbook(); String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022";
XSSFSheet sheet = workbook.getSheetAt(0); //在parentPath下检查是不是存在Excel的目录,如果不存在则创建
//遍历一下sheet File excelDir = new File(parentPath + "\\Excel");
for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) { if (!excelDir.exists()) {
//遍历行 excelDir.mkdir();
XSSFRow row = sheet.getRow(i); }
if (row == null) continue;
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
//遍历列
XSSFCell cell = row.getCell(j);
// 创建公式计算器
FormulaEvaluator evaluator = new XSSFFormulaEvaluator(workbook);
// 检查单元格是否包含公式 String fileStr = parentPath + "\\分析报告20240510\\昆明市人口变化及其对教育的影响20240419.docx";
if (cell!=null && cell.getCellType() == CellType.FORMULA) { File file = new File(fileStr);
// 计算公式并获取结果 String fileName = file.getName();
CellValue evaluatedValue = evaluator.evaluate(cell); //城市名称
System.out.println("Calculated Value: " + evaluatedValue.formatAsString()); String cityName = getCityName(fileName);
} //在excelDir下检查是不是存在cityName的文件夹不存在则创建
if (cell != null) File cityDir = new File(excelDir.getAbsolutePath() + "\\" + cityName);
System.out.print(cell + " "); if (!cityDir.exists()) {
else { cityDir.mkdir();
System.out.print("null "); }
}
}
System.out.println();
}
}
//段落
List<XWPFParagraph> paragraphs = doc.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
String text = paragraph.getText();
if (text.startsWith("图")) {
System.out.println(text);
}
//判断是否为docx文件
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
//读取文件
String inputUrl = file.getAbsolutePath();
InputStream is = new FileInputStream(inputUrl);
ZipSecureFile.setMinInflateRatio(-1.0d);
XWPFDocument doc = new XWPFDocument(is);
//段落
int num = 0;
List<String> tbList = new ArrayList<>();
List<XWPFParagraph> paragraphs = doc.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
String text = paragraph.getText();
// 检查段落是否包含脚注
String footnotes = paragraph.getFootnoteText();
//删除掉text中脚注的文字
text = text.replace(footnotes, "");
text=text.replace("[footnoteRef:0]","");
if (text.startsWith("图")) {
num++;
String[] a = text.split(" ");
//最后一个字符串
String lastStr = "【" + num + "】" + a[a.length - 1];
tbList.add(lastStr);
} }
} }
//图表
List<XWPFChart> charts = doc.getCharts();
//System.out.println("图表数量=" + charts.size() + ",图例个数=" + num);
for (int i = 0; i < charts.size(); i++) {
XWPFChart chart = charts.get(i);
XSSFWorkbook workbook = chart.getWorkbook();
// 创建FileOutputStream对象用于写入新的Excel文件
FileOutputStream outputStream = new FileOutputStream(cityDir.getAbsolutePath() + "\\" + tbList.get(i) + ".xlsx");
// 将工作簿写入到输出流中
workbook.write(outputStream);
// 关闭输出流和工作簿
outputStream.close();
workbook.close();
//输出完成信息
System.out.println("输出" + tbList.get(i) + ".xlsx完成");
}
} }
System.out.println("共读取" + cnt + "个文件");
} }
} }

Loading…
Cancel
Save