@ -1,9 +1,11 @@
package com.dsideal.base.Test ;
import cn.hutool.core.io.FileUtil ;
import com.alibaba.dashscope.exception.InputRequiredException ;
import com.alibaba.dashscope.exception.NoApiKeyException ;
import com.dsideal.base.DataEase.Model.DataEaseModel ;
import com.dsideal.base.Tools.Util.LocalMysqlConnectUtil ;
import com.jfinal.plugin.activerecord.Record ;
import org.jsoup.Jsoup ;
import org.jsoup.nodes.Document ;
import org.jsoup.nodes.Element ;
@ -14,12 +16,9 @@ import java.io.IOException;
import java.io.InputStreamReader ;
import java.net.HttpURLConnection ;
import java.net.URL ;
import java.sql.SQLOutput ;
import java.util.List ;
import com.jfinal.plugin.activerecord.Record ;
public class TestTongYi {
public class TestReadHtml {
public static String getHTML ( String url ) throws IOException {
StringBuilder stringBuilder = new StringBuilder ( ) ;
@ -42,28 +41,27 @@ public class TestTongYi {
* @return
* @throws IOException
* /
public static String getXq ( String areaName ) throws IOException , InterruptedException {
public static String getXq ( String areaName ) throws IOException {
String res = "" ;
String url = "https://baike.baidu.com/item/" + areaName + "?fromModule=lemma_search-box" ;
String htmlContent = getHTML ( url ) ;
// 从字符串解析HTML
Document doc = Jsoup . parse ( htmlContent ) ;
// 选择所有span标签
Elements spans = doc . select ( "span" ) ;
Elements dts = doc . select ( "dt" ) ;
// 遍历所有span标签
for ( Element span : spans ) {
// 检查span的文本是否包含"km²"
if ( span . text ( ) . contains ( "km²" ) ) {
// 输出符合条件的span内容
res = span . text ( ) . replace ( "km²" , "" ) . trim ( ) ;
for ( Element dt : dts ) {
if ( dt . text ( ) . equals ( "下辖地区" ) ) {
System . out . println ( dt . nextElementSibling ( ) . text ( ) ) ;
}
if ( span . text ( ) . contains ( "个镇" ) & & span . text ( ) . contains ( "个乡" ) & & span . text ( ) . contains ( "、" ) & & span . text ( ) . length ( ) < = 12 ) {
res = res + "," + span . text ( ) ;
if ( dt . text ( ) . replace ( " " , "" ) . equals ( "面积" ) ) {
System . out . println ( dt . nextElementSibling ( ) . text ( ) ) ;
}
}
return res ;
}
@ -73,10 +71,11 @@ public class TestTongYi {
DataEaseModel dm = new DataEaseModel ( ) ;
List < Record > list = dm . getProvinceArea ( "云南省" ) ;
for ( Record record : list ) {
String areaName = record . getStr ( "area_name" ) ;
String areaName = record . getStr ( "full_name" ) ;
String cityName = record . getStr ( "city_name" ) ;
String res = getXq ( areaName ) ;
System . out . println ( areaName + "\t" + res ) ;
Thread . sleep ( 3 000) ;
System . out . println ( cityName + "\t" + areaName + "\t" + res ) ;
Thread . sleep ( 1 000) ;
}
}
}