You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

97 lines
4.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import requests
from bs4 import BeautifulSoup
# 定义地区列表
area = ["db"]
city_name = '长春'
for page in area:
# 构造 URL
url = f"https://www.weather.com.cn/textFC/{page}.shtml"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0"
}
# 发送 HTTP 请求获取网页内容
res = requests.get(url=url, headers=headers)
res.encoding = 'utf-8' # 设置编码为 UTF-8
# 使用 BeautifulSoup 解析网页内容
soup = BeautifulSoup(res.text, 'lxml')
# 用于存储已经处理过的城市名称
processed_cities = set()
# 遍历所有 class 为 conMidtab2 的 div 元素
for div in soup.select('div.conMidtab2'):
# 遍历 div 中的所有 tr 元素(表格行)
for tr in div.select('tr'):
# 检查当前行是否包含宽度为 83 的 td 元素,该元素可能包含城市信息
if tr.find('td', width='83'):
# 检查宽度为 83 的 td 元素中是否有 a 标签a 标签内通常是城市名
if tr.find('td', width='83').a:
# 提取城市名
city = tr.find('td', width='83').a.string
# 如果城市已经处理过,则跳过
if city in processed_cities:
continue
# 提取城市名
city = tr.find('td', width='83').a.string
# 如果城市不是city_name则跳过
if city != city_name:
continue
# 如果城市已经处理过,则跳过
if city in processed_cities:
continue
# 否则,将城市添加到已处理集合中
processed_cities.add(city)
# 打印城市名
print(f"城市:{city}")
# 提取上午天气信息
morning_weather_td = tr.find('td', width='89')
if morning_weather_td:
morning_weather = morning_weather_td.string
print(f"上午天气:{morning_weather}")
# 提取上午风力风向信息
morning_wind_td = tr.find('td', width='162')
if morning_wind_td:
spans = morning_wind_td.find_all('span')
if len(spans) >= 2:
morning_wind_1 = spans[0].string
morning_wind_2 = spans[1].string
print(f"上午风力风向:{morning_wind_1} {morning_wind_2}")
# 提取上午最高温度
morning_max_temp_td = tr.find('td', width='92')
if morning_max_temp_td:
morning_max_temp = morning_max_temp_td.string
print(f"上午最高温度:{morning_max_temp}摄氏度")
# 提取晚上天气信息
night_weather_td = tr.find('td', width='98')
if night_weather_td:
night_weather = night_weather_td.string
print(f"晚上天气:{night_weather}")
# 提取晚上风力风向信息
night_wind_td = tr.find('td', width='177')
if night_wind_td:
spans = night_wind_td.find_all('span')
if len(spans) >= 2:
night_wind_1 = spans[0].string
night_wind_2 = spans[1].string
print(f"晚上风力风向:{night_wind_1} {night_wind_2}")
# 提取晚上最低温度
night_min_temp_td = tr.find('td', width='86')
if night_min_temp_td:
night_min_temp = night_min_temp_td.string
print(f"晚上最低温度:{night_min_temp}摄氏度")
else:
# 如果当前行不包含宽度为 83 的 td 元素,跳过该行
continue