You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

82 lines
2.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import time
from selenium import webdriver # 用来驱动浏览器的
from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素
driver = webdriver.Chrome()
# driver.maximize_window() # 最大化
url = 'https://www.youzy.cn/tzy/search/colleges/collegeList'
# 开个大小100000的数组
d = [0] * 1000000
try:
# 隐式等待:在查找所有元素时如果尚未被加载则等10秒
driver.implicitly_wait(10)
wait = WebDriverWait(driver, 10)
driver.get(url)
# 切换iframe
driver.switch_to.frame("youzy_part_view")
# 加载更多,直至加载没了~
cnt = 0
while True:
element = driver.find_element_by_id("scorllELe")
driver.execute_script("arguments[0].scrollTop=1000000", element)
time.sleep(0.2)
# 获取新值
nTop = driver.execute_script("return arguments[0].scrollTop;", element)
d[nTop] = d[nTop] + 1
if d[nTop] > 1000:
break
cnt = cnt + 1
if cnt > 30:
break
time.sleep(1)
# 序号从1开始
num = 1
# 获取当前窗口句柄窗口A
handle = driver.current_window_handle
# 找出所有的学校列表
listComponent = driver.find_elements_by_css_selector("[class='college-list mb30']")
for one in listComponent:
# 名称
a = one.find_element_by_css_selector("[class='f20']")
print(a.text)
driver.execute_script("arguments[0].click();", a)
# 图片
img = one.find_element_by_css_selector("[class='el-image__inner']")
print(img.get_attribute("src"))
# 985 211 双一流A 国重点
t = one.find_elements_by_css_selector("[class='el-tag el-tag--info el-tag--plain']")
for x in t:
print(x.text)
# 地区,种类,主办人
t = one.find_element_by_css_selector("[class='college-desc']")
print(t.text)
# 排名
t = one.find_element_by_css_selector("[class='heat f12']")
print(t.text.replace("排名", '').replace("\n", ''))
num = num + 1
driver.switch_to.window(driver.window_handles[1])
print(driver.current_url)
driver.close()
driver.switch_to.window(driver.window_handles[0])
# 切换iframe
driver.switch_to.frame("youzy_part_view")
time.sleep(1)
print("=================================================")
finally:
driver.quit()