You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
52 lines
1.7 KiB
52 lines
1.7 KiB
import json
|
|
|
|
from selenium import webdriver # 用来驱动浏览器的
|
|
from selenium.webdriver.support.wait import WebDriverWait # 等待页面加载某些元素
|
|
|
|
from Util import MysqlUtil
|
|
|
|
|
|
# 下载图片
|
|
def downImg(code, schoolName):
|
|
url = 'https://pv4y-pc-tob.youzy.cn/colleges/image-list?collegeCode=' + code + '&name=' + schoolName
|
|
driver.get(url)
|
|
s = driver.find_elements_by_css_selector("[class='el-image cover-image']")
|
|
for i in range(len(s)):
|
|
# 下载
|
|
imgSrc = s[i].find_element_by_css_selector("[class='el-image__inner el-image__preview']").get_attribute("src")
|
|
p = s[i].find_element_by_xpath("..").find_element_by_tag_name("p")
|
|
# 保存名称
|
|
sql = 'insert into t_university_image(code,url,img,img_name) values(%s,%s,%s,%s)'
|
|
db.execute(sql, (code, imgSrc, imgSrc.split('/')[-1], p.text))
|
|
db.commit()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
driver = webdriver.Chrome() # 调用带参数的谷歌浏览器
|
|
driver.implicitly_wait(10)
|
|
WebDriverWait(driver, 10)
|
|
# 查恶的最大化
|
|
driver.maximize_window()
|
|
|
|
authPage = 'https://www.youzy.cn/tzy/search/colleges/collegeList'
|
|
driver.get(authPage)
|
|
# 初始化连接
|
|
# 初始化连接
|
|
with open("Config.json", 'r') as load_f:
|
|
connect = json.load(load_f)
|
|
db = MysqlUtil.MySQLConnect(connect)
|
|
|
|
sql = 'select code,cnName from t_university_info'
|
|
s = db.fetchall(sql)
|
|
num = 1
|
|
for x in s:
|
|
downImg(x['code'], x['cnName'])
|
|
print("正在下载第" + str(num) + "个学校的图片。")
|
|
num = num + 1
|
|
|
|
# 关闭数据库
|
|
db.close()
|
|
# 关闭selenium
|
|
driver.close()
|
|
driver.quit()
|