|
|
import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
# 定义地区列表
|
|
|
area = ["db"]
|
|
|
city_name = '长春'
|
|
|
for page in area:
|
|
|
# 构造 URL
|
|
|
url = f"https://www.weather.com.cn/textFC/{page}.shtml"
|
|
|
headers = {
|
|
|
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
|
|
|
}
|
|
|
|
|
|
# 发送 HTTP 请求获取网页内容
|
|
|
res = requests.get(url=url, headers=headers)
|
|
|
res.encoding = 'utf-8' # 设置编码为 UTF-8
|
|
|
|
|
|
# 使用 BeautifulSoup 解析网页内容
|
|
|
soup = BeautifulSoup(res.text, 'lxml')
|
|
|
|
|
|
# 用于存储已经处理过的城市名称
|
|
|
processed_cities = set()
|
|
|
|
|
|
# 遍历所有 class 为 conMidtab2 的 div 元素
|
|
|
for div in soup.select('div.conMidtab2'):
|
|
|
# 遍历 div 中的所有 tr 元素(表格行)
|
|
|
for tr in div.select('tr'):
|
|
|
# 检查当前行是否包含宽度为 83 的 td 元素,该元素可能包含城市信息
|
|
|
if tr.find('td', width='83'):
|
|
|
# 检查宽度为 83 的 td 元素中是否有 a 标签,a 标签内通常是城市名
|
|
|
if tr.find('td', width='83').a:
|
|
|
# 提取城市名
|
|
|
city = tr.find('td', width='83').a.string
|
|
|
|
|
|
# 如果城市已经处理过,则跳过
|
|
|
if city in processed_cities:
|
|
|
continue
|
|
|
# 提取城市名
|
|
|
city = tr.find('td', width='83').a.string
|
|
|
|
|
|
# 如果城市不是city_name,则跳过
|
|
|
if city != city_name:
|
|
|
continue
|
|
|
|
|
|
# 如果城市已经处理过,则跳过
|
|
|
if city in processed_cities:
|
|
|
continue
|
|
|
# 否则,将城市添加到已处理集合中
|
|
|
processed_cities.add(city)
|
|
|
# 打印城市名
|
|
|
print(f"城市:{city}")
|
|
|
|
|
|
# 提取上午天气信息
|
|
|
morning_weather_td = tr.find('td', width='89')
|
|
|
if morning_weather_td:
|
|
|
morning_weather = morning_weather_td.string
|
|
|
print(f"上午天气:{morning_weather}")
|
|
|
|
|
|
# 提取上午风力风向信息
|
|
|
morning_wind_td = tr.find('td', width='162')
|
|
|
if morning_wind_td:
|
|
|
spans = morning_wind_td.find_all('span')
|
|
|
if len(spans) >= 2:
|
|
|
morning_wind_1 = spans[0].string
|
|
|
morning_wind_2 = spans[1].string
|
|
|
print(f"上午风力风向:{morning_wind_1} {morning_wind_2}")
|
|
|
|
|
|
# 提取上午最高温度
|
|
|
morning_max_temp_td = tr.find('td', width='92')
|
|
|
if morning_max_temp_td:
|
|
|
morning_max_temp = morning_max_temp_td.string
|
|
|
print(f"上午最高温度:{morning_max_temp}摄氏度")
|
|
|
|
|
|
# 提取晚上天气信息
|
|
|
night_weather_td = tr.find('td', width='98')
|
|
|
if night_weather_td:
|
|
|
night_weather = night_weather_td.string
|
|
|
print(f"晚上天气:{night_weather}")
|
|
|
|
|
|
# 提取晚上风力风向信息
|
|
|
night_wind_td = tr.find('td', width='177')
|
|
|
if night_wind_td:
|
|
|
spans = night_wind_td.find_all('span')
|
|
|
if len(spans) >= 2:
|
|
|
night_wind_1 = spans[0].string
|
|
|
night_wind_2 = spans[1].string
|
|
|
print(f"晚上风力风向:{night_wind_1} {night_wind_2}")
|
|
|
|
|
|
# 提取晚上最低温度
|
|
|
night_min_temp_td = tr.find('td', width='86')
|
|
|
if night_min_temp_td:
|
|
|
night_min_temp = night_min_temp_td.string
|
|
|
print(f"晚上最低温度:{night_min_temp}摄氏度")
|
|
|
else:
|
|
|
# 如果当前行不包含宽度为 83 的 td 元素,跳过该行
|
|
|
continue
|