parent
aad1bc683a
commit
12f92f8d71
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,171 @@
|
||||
package com.dsideal.base.Tools.FillData.KaiFaArea;
|
||||
|
||||
import com.alibaba.dashscope.exception.InputRequiredException;
|
||||
import com.alibaba.dashscope.exception.NoApiKeyException;
|
||||
import com.dsideal.base.Tools.FillData.DataEaseKit.DsKit;
|
||||
import com.jfinal.kit.StrKit;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class A14 {
|
||||
//示例Excel
|
||||
static String sampleExcelPath = "D:\\dsWork\\YunNanDsBase\\Doc\\待处理\\区\\【14】基本县情\\2023年基本县情【成果】.xlsx";
|
||||
|
||||
public static String getHTML(String url) throws IOException {
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
URL website = new URL(url);
|
||||
HttpURLConnection connection = (HttpURLConnection) website.openConnection();
|
||||
connection.setRequestMethod("GET");
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
stringBuilder.append(line);
|
||||
}
|
||||
reader.close();
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* 按县区名称获取面积和镇乡信息
|
||||
*
|
||||
* @param areaName
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static List<String> getXq(String areaName) throws IOException {
|
||||
List<String> list = new ArrayList<>();
|
||||
String url = "https://baike.baidu.com/item/" + areaName + "?fromModule=lemma_search-box";
|
||||
String htmlContent = getHTML(url);
|
||||
//System.out.println(htmlContent);
|
||||
// 从字符串解析HTML
|
||||
Document doc = Jsoup.parse(htmlContent);
|
||||
Elements dts = doc.select("dt");
|
||||
// 遍历所有dt标签
|
||||
for (Element dt : dts) {
|
||||
if (dt.text().equals("下辖地区")) {
|
||||
// 使用正则表达式替换掉以[]包含的部分
|
||||
String output = dt.nextElementSibling().text().replaceAll("\\[.*?\\]", "");
|
||||
output = output.trim();
|
||||
list.add(output);
|
||||
}
|
||||
if (dt.text().replace(" ", "").equals("面积")) {
|
||||
String output = dt.nextElementSibling().text().replaceAll("\\[.*?\\]", "");
|
||||
output = output.replace("km²", "");
|
||||
output = output.trim();
|
||||
list.add(output);
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
public static List<Integer> extractTownAndVillage(String input) {
|
||||
List<Integer> counts = new ArrayList<>();
|
||||
Pattern pattern = Pattern.compile("(\\d+)个镇|(\\d+)镇|(\\d+)个乡|(\\d+)乡");
|
||||
Matcher matcher = pattern.matcher(input);
|
||||
|
||||
int townCount = 0;
|
||||
int villageCount = 0;
|
||||
|
||||
while (matcher.find()) {
|
||||
if (matcher.group(1) != null || matcher.group(2) != null) {
|
||||
townCount = Integer.parseInt(matcher.group(0).replace("镇", "").replace("乡", "").replace("个", ""));
|
||||
}
|
||||
if (matcher.group(3) != null || matcher.group(4) != null) {
|
||||
villageCount = Integer.parseInt(matcher.group(0).replace("镇", "").replace("乡", "").replace("个", ""));
|
||||
}
|
||||
}
|
||||
|
||||
counts.add(townCount);
|
||||
counts.add(villageCount);
|
||||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* 统计字符串中特定汉字出现的次数
|
||||
*
|
||||
* @param input 要统计的字符串
|
||||
* @param character 要统计的汉字
|
||||
* @return 汉字出现的次数
|
||||
*/
|
||||
public static int countChineseCharacter(String input, String character) {
|
||||
if (input == null || character == null || character.length() != 1) {
|
||||
return 0;
|
||||
}
|
||||
int count = 0;
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
if (input.charAt(i) == character.charAt(0)) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws NoApiKeyException, InputRequiredException, IOException, InterruptedException {
|
||||
FileInputStream inputStream = new FileInputStream(sampleExcelPath);
|
||||
FileOutputStream outputStream;
|
||||
|
||||
Workbook workbook = new XSSFWorkbook(inputStream); // 打开工作簿
|
||||
Sheet sheet = workbook.getSheetAt(0); // 获取第一个工作表
|
||||
|
||||
// 读取数据
|
||||
int idx = 0;
|
||||
for (Row row : sheet) {
|
||||
idx++;
|
||||
if (idx == 1) continue;
|
||||
String cityName = DsKit.readCell(row.getCell(0)).replace(" ", "");
|
||||
String areaName = DsKit.readCell(row.getCell(1)).replace(" ", "");
|
||||
if (!StrKit.isBlank(areaName)) {
|
||||
System.out.println("正在查询县区:" + cityName + "\t" + areaName);
|
||||
List<String> list = getXq(areaName);
|
||||
Thread.sleep(2000);
|
||||
if (!list.isEmpty()) {
|
||||
Cell cell2 = row.getCell(2);
|
||||
if (cell2 == null) cell2 = row.createCell(2);
|
||||
cell2.setCellValue(list.getFirst());
|
||||
}
|
||||
if (list.size() > 1) {
|
||||
List<Integer> result = extractTownAndVillage(list.get(1));
|
||||
int zhenCount = result.get(0);
|
||||
int xiangCount = result.get(1);
|
||||
//如果zhenCount==0,那么我就用list.get(1)中去查找 镇 这个字的数量,然后用这个数量作为镇的数量
|
||||
if (zhenCount == 0 && xiangCount == 0) {
|
||||
zhenCount = countChineseCharacter(list.get(1), "镇");
|
||||
xiangCount = countChineseCharacter(list.get(1), "乡");
|
||||
}
|
||||
System.out.println("镇数量=" + zhenCount + ",乡数量=" + xiangCount);
|
||||
if (zhenCount == 0 && xiangCount == 0) {
|
||||
System.out.println("没有找到镇或乡的数量,请手动输入!");
|
||||
System.out.println(list.get(1));
|
||||
}
|
||||
Cell cell3 = row.getCell(3);
|
||||
if (cell3 == null) cell3 = row.createCell(3);
|
||||
cell3.setCellValue(zhenCount);
|
||||
|
||||
Cell cell4 = row.getCell(4);
|
||||
if (cell4 == null) cell4 = row.createCell(4);
|
||||
cell4.setCellValue(xiangCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
outputStream = new FileOutputStream(sampleExcelPath);
|
||||
workbook.write(outputStream); // 将修改后的工作簿写入文件
|
||||
inputStream.close();
|
||||
outputStream.close();
|
||||
System.out.println("县区所有文件处理完成!");
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue