'commit'

9 months ago · c5207bd309
parent edda9ce801
commit c5207bd309
1 changed files with 48 additions and 0 deletions
--- a/BaiHu/Tools/YunNan.py
+++ b/BaiHu/Tools/YunNan.py
@ -0,0 +1,48 @@
+# pip install pymysql
+# pip install requests beautifulsoup4
+
+import pymysql
+import requests
+from bs4 import BeautifulSoup
+
+if __name__ == '__main__':
+    # 遍历 mysql数据库，然后开启爬虫
+    # 建立数据库连接
+    conn = pymysql.connect(
+        host='10.10.14.203',  # 主机名（或IP地址）
+        port=3306,  # 端口号，默认为3306
+        user='root',  # 用户名
+        password='Password123@mysql',  # 密码
+        charset='utf8mb4'  # 设置字符编码
+    )
+
+    # 创建游标对象
+    cursor = conn.cursor()
+    # 选择数据库
+    conn.select_db("ds_db")
+    # 执行查询操作
+    cursor.execute(
+        "SELECT id,full_name FROM t_dm_area where province_id='FD61813E-70A1-42AB-9A8E-141ED4D47B98' order by level_id")
+
+    # 获取查询结果，返回元组
+    result: tuple = cursor.fetchall()
+
+    for e in result:
+        area_name = e[1]
+        url = "https://baike.baidu.com/item/" + area_name + "?fromModule=lemma_search-box"
+
+        # 发送HTTP GET请求
+        response = requests.get(url)
+        # 检查请求是否成功
+        if response.status_code == 200:
+            # 使用BeautifulSoup解析HTML内容
+            soup = BeautifulSoup(response.text, 'html.parser')
+            # 假设我们要抓取的是<h1>标签中的文字
+            # 你可以根据需要修改选择器来抓取不同的内容
+            h1_text = soup.find('h1').text
+            print(h1_text)  # 打印抓取的文字
+        else:
+            print('Failed to retrieve the webpage')
+    # 关闭游标和连接
+    cursor.close()
+    conn.close()