|
|
# 详解(一)Python + Selenium 批量采集微信公众号,搭建自己的微信公众号每日AI简报,告别信息焦虑
|
|
|
# https://blog.csdn.net/k352733625/article/details/149222945
|
|
|
|
|
|
# 微信爬爬猫---公众号文章抓取代码分析
|
|
|
# https://blog.csdn.net/yajuanpi4899/article/details/121584268
|
|
|
|
|
|
import json
|
|
|
import logging
|
|
|
|
|
|
from torch.distributed.elastic.timer import expires
|
|
|
|
|
|
"""
|
|
|
# 查看selenium版本
|
|
|
pip show selenium
|
|
|
4.34.2
|
|
|
|
|
|
# 查看Chrome浏览器版本
|
|
|
chrome://version/
|
|
|
138.0.7204.101 (正式版本) (64 位)
|
|
|
|
|
|
# 下载驱动包
|
|
|
https://googlechromelabs.github.io/chrome-for-testing/
|
|
|
https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
|
|
|
"""
|
|
|
import time
|
|
|
from selenium import webdriver
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# 定义一个空的字典,存放cookies内容
|
|
|
cookies = {}
|
|
|
# 设置headers - 使用微信内置浏览器的User-Agent
|
|
|
header = {
|
|
|
"HOST": "mp.weixin.qq.com",
|
|
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
|
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
|
"Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
|
|
|
"Connection": "keep-alive"
|
|
|
}
|
|
|
# 用webdriver启动谷歌浏览器
|
|
|
logging.info("启动浏览器,打开微信公众号登录界面")
|
|
|
options = Options()
|
|
|
|
|
|
service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
|
|
|
driver = webdriver.Chrome(service=service, options=options)
|
|
|
# 打开微信公众号登录页面
|
|
|
driver.get('https://mp.weixin.qq.com/')
|
|
|
# 等待5秒钟
|
|
|
time.sleep(2)
|
|
|
# # 拿手机扫二维码!
|
|
|
logging.info("请拿手机扫码二维码登录公众号")
|
|
|
time.sleep(20)
|
|
|
|
|
|
# 重新载入公众号登录页,登录之后会显示公众号后台首页,从这个返回内容中获取cookies信息
|
|
|
driver.get('https://mp.weixin.qq.com/')
|
|
|
# 获取cookies
|
|
|
cookie_items = driver.get_cookies()
|
|
|
expiry=-1
|
|
|
# 获取到的cookies是列表形式,将cookies转成json形式并存入本地名为cookie的文本中
|
|
|
for cookie_item in cookie_items:
|
|
|
cookies[cookie_item['name']] = cookie_item['value']
|
|
|
if('expiry' in cookie_item and cookie_item['expiry'] > expiry):
|
|
|
expiry = cookie_item['expiry']
|
|
|
|
|
|
if "slave_sid" not in cookies:
|
|
|
logging.info("登录公众号失败,获取cookie失败")
|
|
|
exit()
|
|
|
|
|
|
# 将cookies写入文件
|
|
|
cookies["expiry"] = expiry
|
|
|
with open('cookies.txt', mode='w', encoding="utf-8") as f:
|
|
|
f.write(json.dumps(cookies, indent=4, ensure_ascii=False))
|
|
|
# 关闭浏览器
|
|
|
driver.quit()
|
|
|
# 输出提示
|
|
|
print("成功获取了cookies内容!")
|