main
黄海 8 months ago
parent dca57c8cb8
commit 8037bd73b5

@ -1,5 +1,6 @@
package com.dsideal.base.Tools.FillData.Area;
import cn.hutool.core.io.FileUtil;
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.aigc.generation.models.QwenParam;
@ -9,8 +10,17 @@ import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.utils.Constants;
import com.dsideal.base.DataEase.Model.DataEaseModel;
import com.dsideal.base.Tools.FillData.DataEaseKit.DsKit;
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil;
import com.jfinal.kit.StrKit;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.dom4j.DocumentException;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
@ -24,6 +34,14 @@ import java.io.InputStream;
import java.util.List;
public class A15 {
//源文件
static String parentPath = "D:\\dsWork\\YunNanDsBase\\Doc\\全省及州市县区人口与教育报告集20241023\\133个县区报告2022\\县区研究报告";
//哪些是处理不了的,就不处理了~
static String[] excludeCityList = {"~$", "磨憨-磨丁", "经开区", "阳宗海"};
//示例Excel
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\区\\【15】发展规模预测\\教育规模发展情况与预测.xlsx";
/**
*
@ -52,8 +70,14 @@ public class A15 {
return text.replace("\n\n", "\n");
}
public static void main(String[] args) throws IOException, DocumentException, NoApiKeyException, InputRequiredException {
String filePath = "D:\\dsWork\\YunNanDsBase\\Doc\\县区研究报告\\保山市\\昌宁县人口变化及其对教育的影响.docx";
/**
*
*
* @param filePath
* @return
* @throws IOException
*/
public static List<String> getDocxJYFZXZ(String filePath) throws IOException, NoApiKeyException, InputRequiredException {
//使用POI读取上面word文档的所有文字信息
String content = "";
InputStream inputStream = new FileInputStream(filePath);
@ -74,6 +98,10 @@ public class A15 {
List<Integer> posList = new ArrayList<>();
for (String s : a) {
int pos = content.indexOf(s);
if (pos < 0) {
System.out.println(s + "在给定的文本中没有找到,程序无法继续,请人工检查!");
System.exit(-1);
}
posList.add(pos);
}
List<String> res = new ArrayList<>();
@ -81,13 +109,90 @@ public class A15 {
int start = posList.get(i) + a[i].length(), end = posList.get(i + 1);
res.add(content.substring(start, end));
}
for (String re : res) {
List<String> finalList = new ArrayList<>();
for (int i = 0; i < res.size(); i++) {
String re = res.get(i);
String question = "我将给你提供一大段文字,帮我总结下入园或招生情况,划分两个阶段:(1)过去几年入园、招生情况 (2)入园、招生未来发展预测。尽量保留文中原话一个阶段不要超过30字。不要使用markdown语法。";
question += "示例: 1. 与2022年相比寻甸县未来几年学前教育幼儿入园人数增长不大至2035年预计在6800左右仅增加几百人";
question += "2. 未来镇区幼儿入园人数将逐年增加,而乡村幼儿入园人数将明显减少。";
question += "下面是文字内容:";
question += re;
System.out.println(removeEmptyLines(callTongYiQianWen(question)));
finalList.add(removeEmptyLines(callTongYiQianWen(question)));
}
for (int i = 0; i < res.size(); i++) {
String re = res.get(i);
String question = "我将给你提供一大段文字,帮我总结下在园或在校生情况,划分两个阶段:(1)过去几年在园、在校生情况 (2)在园、在校生未来发展预测。尽量保留文中原话一个阶段不要超过30字。不要使用markdown语法。";
question += "示例: 1. 与2022年相比寻甸县未来几年学前教育幼儿在园人数将略有增长至2035年预计达到1.85万左右增加1700人左右";
question += "1. 与2022年相比寻甸县未来几年学前教育幼儿在园人数将略有增长至2035年预计达到1.85万左右增加1700人左右 " +
"2. 未来镇区幼儿在园人数也将持续增长,而乡村幼儿在园人数将明显减少,这与寻甸县持续的高城镇化率密切相关。 ";
question += "下面是文字内容:";
question += re;
finalList.add(removeEmptyLines(callTongYiQianWen(question)));
}
return finalList;
}
public static void main(String[] args) throws IOException, DocumentException, NoApiKeyException, InputRequiredException {
//初始化数据库连接
LocalMysqlConnectUtil.Init();
//结果Excel
XSSFWorkbook outWorkbook = new XSSFWorkbook();
//结果Sheet
XSSFSheet outSheet = DsKit.createSheet(outWorkbook);
//样式
XSSFCellStyle headerStyle = DsKit.getHeaderStyle(outWorkbook);
XSSFCellStyle dataStyle = DsKit.getDataStyle(outWorkbook);
//如果样例文件是xls格式则转化为xlsx格式
sampleExcelPath = DsKit.convertXlsToXlsx(sampleExcelPath);
//拷贝文件头
DsKit.copyHead(sampleExcelPath, outSheet, headerStyle);
//目标Excel,就是把文件名解析出来后,后面添加上【成果】,需要动态计算获取,不能写死
String excelPath = sampleExcelPath.replace(".xlsx", "【成果】.xlsx");
DsKit.delExcel(excelPath);
//找到parentPath下一级目录中所有文件
List<File> files = FileUtil.loopFiles(parentPath, file -> true);
int rowIndex = 0;
//处理这个目录
if (files != null) {
for (File file : files) {
//判断file是不是目录是目录的需要跳过
if (file.isDirectory()) continue;
if (!file.getName().endsWith(".docx") || file.getName().startsWith("~"))
continue;
boolean flag = false;
for (String s : excludeCityList) {
if (file.getName().contains(s)) {
flag = true;
break;
}
}
if (flag) continue;
//县区名称
DataEaseModel dm = new DataEaseModel();
String areaName = dm.getAreaName(file.getName());
//市州名称
String cityName = dm.getCityNameByAreaName(areaName);
if (StrKit.isBlank(cityName) || StrKit.isBlank(areaName)) {
System.out.println("发现异常数据,请人工处理:" + file.getName());
System.exit(0);
}
//县区名称
System.out.println("正在进行" + cityName + "-" + areaName + "的数据填充~");
//获取教育发展现状态
List<String> list = getDocxJYFZXZ(file.getAbsolutePath());
Row outRow = outSheet.createRow(++rowIndex);
DsKit.putData(outRow, Arrays.asList(cityName, areaName, "入园入校", list.get(0), list.get(1), list.get(2), list.get(3)), dataStyle);
outRow = outSheet.createRow(++rowIndex);
DsKit.putData(outRow, Arrays.asList(cityName, areaName, "在园在校", list.get(4), list.get(5), list.get(6), list.get(7)), dataStyle);
}
}
//保存文件
DsKit.saveExcel(excelPath, outWorkbook);
System.out.println("县区所有文件处理完成!");
}
}
Loading…
Cancel
Save