|
|
import json
|
|
|
import time
|
|
|
|
|
|
from selenium import webdriver # 用来驱动浏览器的
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素
|
|
|
from selenium.webdriver.firefox.options import Options
|
|
|
|
|
|
# 配置信息
|
|
|
from Util import MysqlUtil
|
|
|
|
|
|
authPage = 'https://www.youzy.cn/tzy/search/colleges/collegeList'
|
|
|
|
|
|
'''
|
|
|
功能:获取简介
|
|
|
'''
|
|
|
|
|
|
|
|
|
def getJianJie(code):
|
|
|
url = 'https://www.youzy.cn/colleges/introduction?collegeCode=' + code
|
|
|
driver.get(authPage)
|
|
|
driver.get(url)
|
|
|
# 切换iframe
|
|
|
driver.switch_to.frame("youzy_part_view")
|
|
|
time.sleep(1)
|
|
|
txt = driver.find_element_by_css_selector("[class='f16 fcolor333 paragraph']").text
|
|
|
# 更新数据记录
|
|
|
sql = "update t_university_info set Introduction= %s where code=%s"
|
|
|
db.execute(sql, (txt, code))
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
'''
|
|
|
获取院系设置
|
|
|
'''
|
|
|
|
|
|
|
|
|
def getYuanXiSheZhi(code):
|
|
|
url = 'https://www.youzy.cn/colleges/detail?collegeCode=' + code
|
|
|
driver.get(authPage)
|
|
|
driver.get(url)
|
|
|
# 切换iframe
|
|
|
driver.switch_to.frame("youzy_part_view")
|
|
|
time.sleep(1)
|
|
|
# 加载完整更多
|
|
|
try:
|
|
|
driver.find_element_by_css_selector("[class='el-icon-arrow-down ml5']").click()
|
|
|
except:
|
|
|
pass
|
|
|
# 学校名称
|
|
|
print(driver.find_element_by_css_selector("[class='f28 fw400 mr20']").text)
|
|
|
|
|
|
# 院系设置
|
|
|
menu_table = driver.find_elements_by_css_selector("[class='el-table__body']")
|
|
|
table_tr_list = menu_table[-1].find_elements(By.TAG_NAME, "tr")
|
|
|
|
|
|
# 写入
|
|
|
for tr in table_tr_list:
|
|
|
arr1 = tr.text.split("\n")
|
|
|
# 保存Mysql
|
|
|
zhuanye = ''
|
|
|
if len(arr1) > 1:
|
|
|
zhuanye = arr1[1]
|
|
|
sql = "insert into t_university_yuanxi(code,xueyuan,zhuanye) values(%s,%s,%s)"
|
|
|
print(code, arr1[0], zhuanye)
|
|
|
db.execute(sql, (code, arr1[0], zhuanye))
|
|
|
# 提交事务
|
|
|
db.commit()
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# 初始化selenium
|
|
|
# 创建一个参数对象,用来控制chrome以无界面模式打开
|
|
|
option = Options()
|
|
|
option.add_argument('--headless')
|
|
|
option.add_argument('--disable-gpu')
|
|
|
|
|
|
driver = webdriver.Chrome(options=option) # 调用带参数的谷歌浏览器
|
|
|
driver.implicitly_wait(10)
|
|
|
WebDriverWait(driver, 10)
|
|
|
try:
|
|
|
with open("Config.json", 'r') as load_f:
|
|
|
connect = json.load(load_f)
|
|
|
db = MysqlUtil.MySQLConnect(connect)
|
|
|
|
|
|
|
|
|
# 简介
|
|
|
sql = "select code from t_university_info order by id"
|
|
|
l1 = db.fetchall(sql)
|
|
|
for i in range(len(l1)):
|
|
|
print("序号:" + str(i + 1) + ",总数:" + str(len(l1)))
|
|
|
code = l1[i]["code"]
|
|
|
getJianJie(code)
|
|
|
getYuanXiSheZhi(code)
|
|
|
print('===========================================================================')
|
|
|
finally:
|
|
|
driver.close()
|
|
|
driver.quit()
|
|
|
# 关闭数据库
|
|
|
db.close()
|