# 详解(一)Python + Selenium 批量采集微信公众号,搭建自己的微信公众号每日AI简报,告别信息焦虑 # https://blog.csdn.net/k352733625/article/details/149222945 # 微信爬爬猫---公众号文章抓取代码分析 # https://blog.csdn.net/yajuanpi4899/article/details/121584268 import json import logging from torch.distributed.elastic.timer import expires """ # 查看selenium版本 pip show selenium 4.34.2 # 查看Chrome浏览器版本 chrome://version/ 138.0.7204.101 (正式版本) (64 位) # 下载驱动包 https://googlechromelabs.github.io/chrome-for-testing/ https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip """ import time from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service as ChromeService if __name__ == '__main__': # 定义一个空的字典,存放cookies内容 cookies = {} # 设置headers - 使用微信内置浏览器的User-Agent header = { "HOST": "mp.weixin.qq.com", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br", "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4", "Connection": "keep-alive" } # 用webdriver启动谷歌浏览器 logging.info("启动浏览器,打开微信公众号登录界面") options = Options() service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe") driver = webdriver.Chrome(service=service, options=options) # 打开微信公众号登录页面 driver.get('https://mp.weixin.qq.com/') # 等待5秒钟 time.sleep(2) # # 拿手机扫二维码! logging.info("请拿手机扫码二维码登录公众号") time.sleep(20) # 重新载入公众号登录页,登录之后会显示公众号后台首页,从这个返回内容中获取cookies信息 driver.get('https://mp.weixin.qq.com/') # 获取cookies cookie_items = driver.get_cookies() expiry=-1 # 获取到的cookies是列表形式,将cookies转成json形式并存入本地名为cookie的文本中 for cookie_item in cookie_items: cookies[cookie_item['name']] = cookie_item['value'] if('expiry' in cookie_item and cookie_item['expiry'] > expiry): expiry = cookie_item['expiry'] if "slave_sid" not in cookies: logging.info("登录公众号失败,获取cookie失败") exit() # 将cookies写入文件 cookies["expiry"] = expiry with open('cookies.txt', mode='w', encoding="utf-8") as f: f.write(json.dumps(cookies, indent=4, ensure_ascii=False)) # 关闭浏览器 driver.quit() # 输出提示 print("成功获取了cookies内容!")