main
黄海 9 months ago
parent edda9ce801
commit c5207bd309

@ -0,0 +1,48 @@
# pip install pymysql
# pip install requests beautifulsoup4
import pymysql
import requests
from bs4 import BeautifulSoup
if __name__ == '__main__':
# 遍历 mysql数据库然后开启爬虫
# 建立数据库连接
conn = pymysql.connect(
host='10.10.14.203', # 主机名或IP地址
port=3306, # 端口号默认为3306
user='root', # 用户名
password='Password123@mysql', # 密码
charset='utf8mb4' # 设置字符编码
)
# 创建游标对象
cursor = conn.cursor()
# 选择数据库
conn.select_db("ds_db")
# 执行查询操作
cursor.execute(
"SELECT id,full_name FROM t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id")
# 获取查询结果,返回元组
result: tuple = cursor.fetchall()
for e in result:
area_name = e[1]
url = "https://baike.baidu.com/item/" + area_name + "?fromModule=lemma_search-box"
# 发送HTTP GET请求
response = requests.get(url)
# 检查请求是否成功
if response.status_code == 200:
# 使用BeautifulSoup解析HTML内容
soup = BeautifulSoup(response.text, 'html.parser')
# 假设我们要抓取的是<h1>标签中的文字
# 你可以根据需要修改选择器来抓取不同的内容
h1_text = soup.find('h1').text
print(h1_text) # 打印抓取的文字
else:
print('Failed to retrieve the webpage')
# 关闭游标和连接
cursor.close()
conn.close()
Loading…
Cancel
Save