|
|
|
@ -15,42 +15,58 @@ import java.util.List;
|
|
|
|
|
public class ReadDoc {
|
|
|
|
|
//https://blog.csdn.net/a346736962/article/details/123037797
|
|
|
|
|
public static void main(String[] args) throws IOException, InvalidFormatException {
|
|
|
|
|
final String inputUrl = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告\\保山市各县区报告5\\昌宁县人口变化及其对教育的影响.docx";
|
|
|
|
|
InputStream is = new FileInputStream(inputUrl);
|
|
|
|
|
XWPFDocument doc = new XWPFDocument(is);
|
|
|
|
|
//图表
|
|
|
|
|
List<XWPFChart> charts = doc.getCharts();
|
|
|
|
|
String workingPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\16个州市报告2022\\分析报告20240510";
|
|
|
|
|
//遍历workingPath下的所有文件
|
|
|
|
|
File file = new File(workingPath);
|
|
|
|
|
String[] fileList = file.list();
|
|
|
|
|
for (String fileName : fileList) {
|
|
|
|
|
//判断是否为文件夹
|
|
|
|
|
File tempFile = new File(workingPath + "/" + fileName);
|
|
|
|
|
if (tempFile.isDirectory()) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
//判断是否为docx文件
|
|
|
|
|
if (fileName.endsWith(".docx") && !fileName.startsWith("~")) {
|
|
|
|
|
//读取文件
|
|
|
|
|
String inputUrl = workingPath + "/" + fileName;
|
|
|
|
|
InputStream is = new FileInputStream(inputUrl);
|
|
|
|
|
XWPFDocument doc = new XWPFDocument(is);
|
|
|
|
|
//图表
|
|
|
|
|
List<XWPFChart> charts = doc.getCharts();
|
|
|
|
|
|
|
|
|
|
System.out.println("图表数量="+charts.size());
|
|
|
|
|
System.out.println("图表数量="+charts.size());
|
|
|
|
|
|
|
|
|
|
for (XWPFChart chart : charts) {
|
|
|
|
|
XSSFWorkbook workbook = chart.getWorkbook();
|
|
|
|
|
XSSFSheet sheet = workbook.getSheetAt(0);
|
|
|
|
|
//遍历一下sheet
|
|
|
|
|
for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) {
|
|
|
|
|
//遍历行
|
|
|
|
|
XSSFRow row = sheet.getRow(i);
|
|
|
|
|
if(row==null) continue;
|
|
|
|
|
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
|
|
|
|
|
//遍历列
|
|
|
|
|
XSSFCell cell = row.getCell(j);
|
|
|
|
|
if (cell != null)
|
|
|
|
|
System.out.print(cell + " ");
|
|
|
|
|
else{
|
|
|
|
|
System.out.print("null ");
|
|
|
|
|
for (XWPFChart chart : charts) {
|
|
|
|
|
XSSFWorkbook workbook = chart.getWorkbook();
|
|
|
|
|
XSSFSheet sheet = workbook.getSheetAt(0);
|
|
|
|
|
//遍历一下sheet
|
|
|
|
|
for (int i = 0; i < sheet.getPhysicalNumberOfRows(); i++) {
|
|
|
|
|
//遍历行
|
|
|
|
|
XSSFRow row = sheet.getRow(i);
|
|
|
|
|
if(row==null) continue;
|
|
|
|
|
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
|
|
|
|
|
//遍历列
|
|
|
|
|
XSSFCell cell = row.getCell(j);
|
|
|
|
|
if (cell != null)
|
|
|
|
|
System.out.print(cell + " ");
|
|
|
|
|
else{
|
|
|
|
|
System.out.print("null ");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
System.out.println();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//段落
|
|
|
|
|
List<XWPFParagraph> paragraphs = doc.getParagraphs();
|
|
|
|
|
for (XWPFParagraph paragraph : paragraphs) {
|
|
|
|
|
String text = paragraph.getText();
|
|
|
|
|
if(text.startsWith("图")){
|
|
|
|
|
System.out.println(text);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
System.out.println();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
//段落
|
|
|
|
|
List<XWPFParagraph> paragraphs = doc.getParagraphs();
|
|
|
|
|
for (XWPFParagraph paragraph : paragraphs) {
|
|
|
|
|
String text = paragraph.getText();
|
|
|
|
|
if(text.startsWith("图")){
|
|
|
|
|
System.out.println(text);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|