import json import time from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver.common.by import By from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素 from selenium.webdriver.firefox.options import Options # 配置信息 from Util import MysqlUtil authPage = 'https://www.youzy.cn/tzy/search/colleges/collegeList' ''' 功能:获取简介 ''' def getJianJie(code): url = 'https://www.youzy.cn/colleges/introduction?collegeCode=' + code driver.get(authPage) driver.get(url) # 切换iframe driver.switch_to.frame("youzy_part_view") time.sleep(1) txt = driver.find_element_by_css_selector("[class='f16 fcolor333 paragraph']").text # 更新数据记录 sql = "update t_university_info set Introduction= %s where code=%s" db.execute(sql, (txt, code)) db.commit() ''' 获取院系设置 ''' def getYuanXiSheZhi(code): url = 'https://www.youzy.cn/colleges/detail?collegeCode=' + code driver.get(authPage) driver.get(url) # 切换iframe driver.switch_to.frame("youzy_part_view") time.sleep(1) # 加载完整更多 try: driver.find_element_by_css_selector("[class='el-icon-arrow-down ml5']").click() except: pass # 学校名称 print(driver.find_element_by_css_selector("[class='f28 fw400 mr20']").text) # 院系设置 menu_table = driver.find_elements_by_css_selector("[class='el-table__body']") table_tr_list = menu_table[-1].find_elements(By.TAG_NAME, "tr") # 写入 for tr in table_tr_list: arr1 = tr.text.split("\n") # 保存Mysql zhuanye = '' if len(arr1) > 1: zhuanye = arr1[1] sql = "insert into t_university_yuanxi(code,xueyuan,zhuanye) values(%s,%s,%s)" print(code, arr1[0], zhuanye) db.execute(sql, (code, arr1[0], zhuanye)) # 提交事务 db.commit() if __name__ == '__main__': # 初始化selenium # 创建一个参数对象,用来控制chrome以无界面模式打开 option = Options() option.add_argument('--headless') option.add_argument('--disable-gpu') driver = webdriver.Chrome(options=option) # 调用带参数的谷歌浏览器 driver.implicitly_wait(10) WebDriverWait(driver, 10) try: with open("Config.json", 'r') as load_f: connect = json.load(load_f) db = MysqlUtil.MySQLConnect(connect) # 简介 sql = "select code from t_university_info order by id" l1 = db.fetchall(sql) for i in range(len(l1)): print("序号:" + str(i + 1) + ",总数:" + str(len(l1))) code = l1[i]["code"] getJianJie(code) getYuanXiSheZhi(code) print('===========================================================================') finally: driver.close() driver.quit() # 关闭数据库 db.close()