From c5207bd3093174a5007aee8a67b609437d64db0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=84=E6=B5=B7?= <10402852@qq.com> Date: Mon, 28 Oct 2024 08:29:25 +0800 Subject: [PATCH] 'commit' --- BaiHu/Tools/YunNan.py | 48 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 BaiHu/Tools/YunNan.py diff --git a/BaiHu/Tools/YunNan.py b/BaiHu/Tools/YunNan.py new file mode 100644 index 00000000..de365397 --- /dev/null +++ b/BaiHu/Tools/YunNan.py @@ -0,0 +1,48 @@ +# pip install pymysql +# pip install requests beautifulsoup4 + +import pymysql +import requests +from bs4 import BeautifulSoup + +if __name__ == '__main__': + # 遍历 mysql数据库,然后开启爬虫 + # 建立数据库连接 + conn = pymysql.connect( + host='10.10.14.203', # 主机名(或IP地址) + port=3306, # 端口号,默认为3306 + user='root', # 用户名 + password='Password123@mysql', # 密码 + charset='utf8mb4' # 设置字符编码 + ) + + # 创建游标对象 + cursor = conn.cursor() + # 选择数据库 + conn.select_db("ds_db") + # 执行查询操作 + cursor.execute( + "SELECT id,full_name FROM t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id") + + # 获取查询结果,返回元组 + result: tuple = cursor.fetchall() + + for e in result: + area_name = e[1] + url = "https://baike.baidu.com/item/" + area_name + "?fromModule=lemma_search-box" + + # 发送HTTP GET请求 + response = requests.get(url) + # 检查请求是否成功 + if response.status_code == 200: + # 使用BeautifulSoup解析HTML内容 + soup = BeautifulSoup(response.text, 'html.parser') + # 假设我们要抓取的是