|
|
@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
# 详解(一)Python + Selenium 批量采集微信公众号,搭建自己的微信公众号每日AI简报,告别信息焦虑
|
|
|
|
|
|
|
|
# https://blog.csdn.net/k352733625/article/details/149222945
|
|
|
|
|
|
|
|
import logging
|
|
|
|
|
|
|
|
# 1、安装Firefox软件【最新】
|
|
|
|
|
|
|
|
# https://www.firefox.com.cn/download/#product-desktop-release
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 2、下载geckodriver驱动【最新】
|
|
|
|
|
|
|
|
# https://splinter-docs-zh-cn.readthedocs.io/zh/latest/drivers/firefox.html
|
|
|
|
|
|
|
|
# https://github.com/mozilla/geckodriver/releases
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
# 查看selenium版本
|
|
|
|
|
|
|
|
pip show selenium
|
|
|
|
|
|
|
|
4.34.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 查看Chrome浏览器版本
|
|
|
|
|
|
|
|
chrome://version/
|
|
|
|
|
|
|
|
138.0.7204.101 (正式版本) (64 位)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 下载驱动包
|
|
|
|
|
|
|
|
https://googlechromelabs.github.io/chrome-for-testing/
|
|
|
|
|
|
|
|
https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
import time, random, re, json, requests
|
|
|
|
|
|
|
|
from selenium import webdriver
|
|
|
|
|
|
|
|
from selenium.webdriver import Chrome
|
|
|
|
|
|
|
|
from selenium.webdriver.firefox.options import Options
|
|
|
|
|
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
|
|
|
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def weChat_login():
|
|
|
|
|
|
|
|
# 定义一个空的字典,存放cookies内容
|
|
|
|
|
|
|
|
post = {}
|
|
|
|
|
|
|
|
# 用webdriver启动谷歌浏览器
|
|
|
|
|
|
|
|
logging.info("启动浏览器,打开微信公众号登录界面")
|
|
|
|
|
|
|
|
options = Options()
|
|
|
|
|
|
|
|
options.add_argument('-headless') # 无头参数
|
|
|
|
|
|
|
|
service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
|
|
|
|
|
|
|
|
driver = webdriver.Chrome(service=service)
|
|
|
|
|
|
|
|
# 打开微信公众号登录页面
|
|
|
|
|
|
|
|
driver.get('https://mp.weixin.qq.com/')
|
|
|
|
|
|
|
|
# 等待5秒钟
|
|
|
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
# # 拿手机扫二维码!
|
|
|
|
|
|
|
|
logging.info("请拿手机扫码二维码登录公众号")
|
|
|
|
|
|
|
|
time.sleep(20)
|
|
|
|
|
|
|
|
# 重新载入公众号登录页,登录之后会显示公众号后台首页,从这个返回内容中获取cookies信息
|
|
|
|
|
|
|
|
driver.get('https://mp.weixin.qq.com/')
|
|
|
|
|
|
|
|
# 获取cookies
|
|
|
|
|
|
|
|
cookie_items = driver.get_cookies()
|
|
|
|
|
|
|
|
# 获取到的cookies是列表形式,将cookies转成json形式并存入本地名为cookie的文本中
|
|
|
|
|
|
|
|
for cookie_item in cookie_items:
|
|
|
|
|
|
|
|
post[cookie_item['name']] = cookie_item['value']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if "slave_sid" not in post:
|
|
|
|
|
|
|
|
logging.info("登录公众号失败,获取cookie失败")
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
cookie_str = json.dumps(post)
|
|
|
|
|
|
|
|
return cookie_str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
|
|
|
cookie_str = weChat_login()
|
|
|
|
|
|
|
|
print(cookie_str)
|