You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

79 lines
3.2 KiB

1 week ago
# 详解Python + Selenium 批量采集微信公众号搭建自己的微信公众号每日AI简报告别信息焦虑
# https://blog.csdn.net/k352733625/article/details/149222945
# 微信爬爬猫---公众号文章抓取代码分析
# https://blog.csdn.net/yajuanpi4899/article/details/121584268
1 week ago
import json
1 week ago
import logging
1 week ago
from torch.distributed.elastic.timer import expires
1 week ago
"""
# 查看selenium版本
pip show selenium
4.34.2
# 查看Chrome浏览器版本
chrome://version/
138.0.7204.101 (正式版本) 64
# 下载驱动包
https://googlechromelabs.github.io/chrome-for-testing/
https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
"""
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service as ChromeService
if __name__ == '__main__':
# 定义一个空的字典存放cookies内容
cookies = {}
# 设置headers - 使用微信内置浏览器的User-Agent
header = {
"HOST": "mp.weixin.qq.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
"Connection": "keep-alive"
}
# 用webdriver启动谷歌浏览器
logging.info("启动浏览器,打开微信公众号登录界面")
options = Options()
service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
driver = webdriver.Chrome(service=service, options=options)
# 打开微信公众号登录页面
driver.get('https://mp.weixin.qq.com/')
# 等待5秒钟
time.sleep(2)
# # 拿手机扫二维码!
logging.info("请拿手机扫码二维码登录公众号")
time.sleep(20)
# 重新载入公众号登录页登录之后会显示公众号后台首页从这个返回内容中获取cookies信息
driver.get('https://mp.weixin.qq.com/')
# 获取cookies
cookie_items = driver.get_cookies()
1 week ago
expiry=-1
1 week ago
# 获取到的cookies是列表形式将cookies转成json形式并存入本地名为cookie的文本中
for cookie_item in cookie_items:
cookies[cookie_item['name']] = cookie_item['value']
1 week ago
if('expiry' in cookie_item and cookie_item['expiry'] > expiry):
expiry = cookie_item['expiry']
1 week ago
if "slave_sid" not in cookies:
logging.info("登录公众号失败获取cookie失败")
exit()
1 week ago
# 将cookies写入文件
1 week ago
cookies["expiry"] = expiry
1 week ago
with open('cookies.txt', mode='w', encoding="utf-8") as f:
1 week ago
f.write(json.dumps(cookies, indent=4, ensure_ascii=False))
1 week ago
# 关闭浏览器
1 week ago
driver.quit()
1 week ago
# 输出提示
print("成功获取了cookies内容")