import time from selenium import webdriver # 用来驱动浏览器的 from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素 driver = webdriver.Chrome() # driver.maximize_window() # 最大化 url = 'https://www.youzy.cn/tzy/search/colleges/collegeList' # 开个大小100000的数组 d = [0] * 1000000 try: # 隐式等待:在查找所有元素时,如果尚未被加载,则等10秒 driver.implicitly_wait(10) wait = WebDriverWait(driver, 10) driver.get(url) # 切换iframe driver.switch_to.frame("youzy_part_view") # 加载更多,直至加载没了~ cnt = 0 while True: element = driver.find_element_by_id("scorllELe") driver.execute_script("arguments[0].scrollTop=1000000", element) time.sleep(0.2) # 获取新值 nTop = driver.execute_script("return arguments[0].scrollTop;", element) d[nTop] = d[nTop] + 1 if d[nTop] > 1000: break cnt = cnt + 1 if cnt > 30: break time.sleep(1) # 序号从1开始 num = 1 # 获取当前窗口句柄(窗口A) handle = driver.current_window_handle # 找出所有的学校列表 listComponent = driver.find_elements_by_css_selector("[class='college-list mb30']") for one in listComponent: # 名称 a = one.find_element_by_css_selector("[class='f20']") print(a.text) driver.execute_script("arguments[0].click();", a) # 图片 img = one.find_element_by_css_selector("[class='el-image__inner']") print(img.get_attribute("src")) # 985 211 双一流A 国重点 t = one.find_elements_by_css_selector("[class='el-tag el-tag--info el-tag--plain']") for x in t: print(x.text) # 地区,种类,主办人 t = one.find_element_by_css_selector("[class='college-desc']") print(t.text) # 排名 t = one.find_element_by_css_selector("[class='heat f12']") print(t.text.replace("排名", '').replace("\n", '')) num = num + 1 driver.switch_to.window(driver.window_handles[1]) print(driver.current_url) driver.close() driver.switch_to.window(driver.window_handles[0]) # 切换iframe driver.switch_to.frame("youzy_part_view") time.sleep(1) print("=================================================") finally: driver.quit()