diff --git a/Doc/待处理/区/【15】发展规模预测/教育规模发展情况与预测.xlsx b/Doc/待处理/区/【15】发展规模预测/教育规模发展情况与预测.xlsx new file mode 100644 index 00000000..eba5cd75 Binary files /dev/null and b/Doc/待处理/区/【15】发展规模预测/教育规模发展情况与预测.xlsx differ diff --git a/src/main/java/com/dsideal/base/Tools/FillData/Area/A15.java b/src/main/java/com/dsideal/base/Tools/FillData/Area/A15.java index 68b40582..32780005 100644 --- a/src/main/java/com/dsideal/base/Tools/FillData/Area/A15.java +++ b/src/main/java/com/dsideal/base/Tools/FillData/Area/A15.java @@ -1,5 +1,6 @@ package com.dsideal.base.Tools.FillData.Area; +import cn.hutool.core.io.FileUtil; import com.alibaba.dashscope.aigc.generation.Generation; import com.alibaba.dashscope.aigc.generation.GenerationResult; import com.alibaba.dashscope.aigc.generation.models.QwenParam; @@ -9,8 +10,17 @@ import com.alibaba.dashscope.exception.ApiException; import com.alibaba.dashscope.exception.InputRequiredException; import com.alibaba.dashscope.exception.NoApiKeyException; import com.alibaba.dashscope.utils.Constants; +import com.dsideal.base.DataEase.Model.DataEaseModel; +import com.dsideal.base.Tools.FillData.DataEaseKit.DsKit; +import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil; +import com.jfinal.kit.StrKit; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.xssf.usermodel.XSSFCellStyle; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.dom4j.DocumentException; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; @@ -24,6 +34,14 @@ import java.io.InputStream; import java.util.List; public class A15 { + //源文件 + static String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告"; + + //哪些是处理不了的,就不处理了~ + static String[] excludeCityList = {"~$", "磨憨-磨丁", "经开区", "阳宗海"}; + //示例Excel + static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\区\\【15】发展规模预测\\教育规模发展情况与预测.xlsx"; + /** * 调用通义千问 @@ -52,8 +70,14 @@ public class A15 { return text.replace("\n\n", "\n"); } - public static void main(String[] args) throws IOException, DocumentException, NoApiKeyException, InputRequiredException { - String filePath = "D:\\dsWork\\YunNanDsBase\\Doc\\县区研究报告\\保山市\\昌宁县人口变化及其对教育的影响.docx"; + /** + * 获取指定文档中的四个阶段的教育发展现状 + * + * @param filePath + * @return + * @throws IOException + */ + public static List getDocxJYFZXZ(String filePath) throws IOException, NoApiKeyException, InputRequiredException { //使用POI读取上面word文档的所有文字信息 String content = ""; InputStream inputStream = new FileInputStream(filePath); @@ -74,6 +98,10 @@ public class A15 { List posList = new ArrayList<>(); for (String s : a) { int pos = content.indexOf(s); + if (pos < 0) { + System.out.println(s + "在给定的文本中没有找到,程序无法继续,请人工检查!"); + System.exit(-1); + } posList.add(pos); } List res = new ArrayList<>(); @@ -81,13 +109,90 @@ public class A15 { int start = posList.get(i) + a[i].length(), end = posList.get(i + 1); res.add(content.substring(start, end)); } - for (String re : res) { + List finalList = new ArrayList<>(); + for (int i = 0; i < res.size(); i++) { + String re = res.get(i); String question = "我将给你提供一大段文字,帮我总结下入园或招生情况,划分两个阶段:(1)过去几年入园、招生情况 (2)入园、招生未来发展预测。尽量保留文中原话,一个阶段不要超过30字。不要使用markdown语法。"; question += "示例: 1. 与2022年相比,寻甸县未来几年,学前教育幼儿入园人数增长不大,至2035年预计在6800左右,仅增加几百人"; question += "2. 未来镇区幼儿入园人数将逐年增加,而乡村幼儿入园人数将明显减少。"; question += "下面是文字内容:"; question += re; - System.out.println(removeEmptyLines(callTongYiQianWen(question))); + finalList.add(removeEmptyLines(callTongYiQianWen(question))); + } + + for (int i = 0; i < res.size(); i++) { + String re = res.get(i); + String question = "我将给你提供一大段文字,帮我总结下在园或在校生情况,划分两个阶段:(1)过去几年在园、在校生情况 (2)在园、在校生未来发展预测。尽量保留文中原话,一个阶段不要超过30字。不要使用markdown语法。"; + question += "示例: 1. 与2022年相比,寻甸县未来几年,学前教育幼儿在园人数将略有增长,至2035年预计达到1.85万左右,增加1700人左右;"; + question += "1. 与2022年相比,寻甸县未来几年,学前教育幼儿在园人数将略有增长,至2035年预计达到1.85万左右,增加1700人左右; " + + "2. 未来镇区幼儿在园人数也将持续增长,而乡村幼儿在园人数将明显减少,这与寻甸县持续的高城镇化率密切相关。 "; + question += "下面是文字内容:"; + question += re; + finalList.add(removeEmptyLines(callTongYiQianWen(question))); + } + return finalList; + } + + public static void main(String[] args) throws IOException, DocumentException, NoApiKeyException, InputRequiredException { + //初始化数据库连接 + LocalMysqlConnectUtil.Init(); + + //结果Excel + XSSFWorkbook outWorkbook = new XSSFWorkbook(); + //结果Sheet + XSSFSheet outSheet = DsKit.createSheet(outWorkbook); + //样式 + XSSFCellStyle headerStyle = DsKit.getHeaderStyle(outWorkbook); + XSSFCellStyle dataStyle = DsKit.getDataStyle(outWorkbook); + //如果样例文件是xls格式,则转化为xlsx格式 + sampleExcelPath = DsKit.convertXlsToXlsx(sampleExcelPath); + //拷贝文件头 + DsKit.copyHead(sampleExcelPath, outSheet, headerStyle); + + //目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死 + String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx"); + DsKit.delExcel(excelPath); + + //找到parentPath下一级目录中所有文件 + List files = FileUtil.loopFiles(parentPath, file -> true); + int rowIndex = 0; + //处理这个目录 + if (files != null) { + for (File file : files) { + //判断file是不是目录,是目录的需要跳过 + if (file.isDirectory()) continue; + if (!file.getName().endsWith(".docx") || file.getName().startsWith("~")) + continue; + boolean flag = false; + for (String s : excludeCityList) { + if (file.getName().contains(s)) { + flag = true; + break; + } + } + if (flag) continue; + //县区名称 + DataEaseModel dm = new DataEaseModel(); + String areaName = dm.getAreaName(file.getName()); + //市州名称 + String cityName = dm.getCityNameByAreaName(areaName); + + if (StrKit.isBlank(cityName) || StrKit.isBlank(areaName)) { + System.out.println("发现异常数据,请人工处理:" + file.getName()); + System.exit(0); + } + //县区名称 + System.out.println("正在进行" + cityName + "-" + areaName + "的数据填充~"); + //获取教育发展现状态 + List list = getDocxJYFZXZ(file.getAbsolutePath()); + Row outRow = outSheet.createRow(++rowIndex); + DsKit.putData(outRow, Arrays.asList(cityName, areaName, "入园入校", list.get(0), list.get(1), list.get(2), list.get(3)), dataStyle); + outRow = outSheet.createRow(++rowIndex); + DsKit.putData(outRow, Arrays.asList(cityName, areaName, "在园在校", list.get(4), list.get(5), list.get(6), list.get(7)), dataStyle); + } } + //保存文件 + DsKit.saveExcel(excelPath, outWorkbook); + System.out.println("县区所有文件处理完成!"); } } \ No newline at end of file