|
|
import time
|
|
|
|
|
|
from selenium import webdriver # 用来驱动浏览器的
|
|
|
from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素
|
|
|
|
|
|
driver = webdriver.Chrome()
|
|
|
# driver.maximize_window() # 最大化
|
|
|
url = 'https://www.youzy.cn/tzy/search/colleges/collegeList'
|
|
|
|
|
|
# 开个大小100000的数组
|
|
|
d = [0] * 1000000
|
|
|
|
|
|
try:
|
|
|
# 隐式等待:在查找所有元素时,如果尚未被加载,则等10秒
|
|
|
driver.implicitly_wait(10)
|
|
|
wait = WebDriverWait(driver, 10)
|
|
|
driver.get(url)
|
|
|
|
|
|
# 切换iframe
|
|
|
driver.switch_to.frame("youzy_part_view")
|
|
|
|
|
|
# 加载更多,直至加载没了~
|
|
|
cnt = 0
|
|
|
while True:
|
|
|
element = driver.find_element_by_id("scorllELe")
|
|
|
driver.execute_script("arguments[0].scrollTop=1000000", element)
|
|
|
time.sleep(0.2)
|
|
|
# 获取新值
|
|
|
nTop = driver.execute_script("return arguments[0].scrollTop;", element)
|
|
|
d[nTop] = d[nTop] + 1
|
|
|
if d[nTop] > 1000:
|
|
|
break
|
|
|
cnt = cnt + 1
|
|
|
if cnt > 30:
|
|
|
break
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
# 序号从1开始
|
|
|
num = 1
|
|
|
|
|
|
# 获取当前窗口句柄(窗口A)
|
|
|
handle = driver.current_window_handle
|
|
|
|
|
|
# 找出所有的学校列表
|
|
|
listComponent = driver.find_elements_by_css_selector("[class='college-list mb30']")
|
|
|
for one in listComponent:
|
|
|
# 名称
|
|
|
a = one.find_element_by_css_selector("[class='f20']")
|
|
|
print(a.text)
|
|
|
driver.execute_script("arguments[0].click();", a)
|
|
|
|
|
|
# 图片
|
|
|
img = one.find_element_by_css_selector("[class='el-image__inner']")
|
|
|
print(img.get_attribute("src"))
|
|
|
|
|
|
# 985 211 双一流A 国重点
|
|
|
t = one.find_elements_by_css_selector("[class='el-tag el-tag--info el-tag--plain']")
|
|
|
for x in t:
|
|
|
print(x.text)
|
|
|
|
|
|
# 地区,种类,主办人
|
|
|
t = one.find_element_by_css_selector("[class='college-desc']")
|
|
|
print(t.text)
|
|
|
|
|
|
# 排名
|
|
|
t = one.find_element_by_css_selector("[class='heat f12']")
|
|
|
print(t.text.replace("排名", '').replace("\n", ''))
|
|
|
|
|
|
num = num + 1
|
|
|
driver.switch_to.window(driver.window_handles[1])
|
|
|
print(driver.current_url)
|
|
|
driver.close()
|
|
|
driver.switch_to.window(driver.window_handles[0])
|
|
|
# 切换iframe
|
|
|
driver.switch_to.frame("youzy_part_view")
|
|
|
time.sleep(1)
|
|
|
|
|
|
print("=================================================")
|
|
|
finally:
|
|
|
driver.quit()
|