You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

101 lines
3.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import json
import time
from selenium import webdriver # 用来驱动浏览器的
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素
from selenium.webdriver.firefox.options import Options
# 配置信息
from Util import MysqlUtil
authPage = 'https://www.youzy.cn/tzy/search/colleges/collegeList'
'''
功能:获取简介
'''
def getJianJie(code):
url = 'https://www.youzy.cn/colleges/introduction?collegeCode=' + code
driver.get(authPage)
driver.get(url)
# 切换iframe
driver.switch_to.frame("youzy_part_view")
time.sleep(1)
txt = driver.find_element_by_css_selector("[class='f16 fcolor333 paragraph']").text
# 更新数据记录
sql = "update t_university_info set Introduction= %s where code=%s"
db.execute(sql, (txt, code))
db.commit()
'''
获取院系设置
'''
def getYuanXiSheZhi(code):
url = 'https://www.youzy.cn/colleges/detail?collegeCode=' + code
driver.get(authPage)
driver.get(url)
# 切换iframe
driver.switch_to.frame("youzy_part_view")
time.sleep(1)
# 加载完整更多
try:
driver.find_element_by_css_selector("[class='el-icon-arrow-down ml5']").click()
except:
pass
# 学校名称
print(driver.find_element_by_css_selector("[class='f28 fw400 mr20']").text)
# 院系设置
menu_table = driver.find_elements_by_css_selector("[class='el-table__body']")
table_tr_list = menu_table[-1].find_elements(By.TAG_NAME, "tr")
# 写入
for tr in table_tr_list:
arr1 = tr.text.split("\n")
# 保存Mysql
zhuanye = ''
if len(arr1) > 1:
zhuanye = arr1[1]
sql = "insert into t_university_yuanxi(code,xueyuan,zhuanye) values(%s,%s,%s)"
print(code, arr1[0], zhuanye)
db.execute(sql, (code, arr1[0], zhuanye))
# 提交事务
db.commit()
if __name__ == '__main__':
# 初始化selenium
# 创建一个参数对象用来控制chrome以无界面模式打开
option = Options()
option.add_argument('--headless')
option.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=option) # 调用带参数的谷歌浏览器
driver.implicitly_wait(10)
WebDriverWait(driver, 10)
try:
with open("Config.json", 'r') as load_f:
connect = json.load(load_f)
db = MysqlUtil.MySQLConnect(connect)
# 简介
sql = "select code from t_university_info order by id"
l1 = db.fetchall(sql)
for i in range(len(l1)):
print("序号:" + str(i + 1) + ",总数:" + str(len(l1)))
code = l1[i]["code"]
getJianJie(code)
getYuanXiSheZhi(code)
print('===========================================================================')
finally:
driver.close()
driver.quit()
# 关闭数据库
db.close()