diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py index b9d6ff78..8fcde632 100644 --- a/dsLightRag/Test/TestCrawl.py +++ b/dsLightRag/Test/TestCrawl.py @@ -1,6 +1,10 @@ # 详解(一)Python + Selenium 批量采集微信公众号,搭建自己的微信公众号每日AI简报,告别信息焦虑 # https://blog.csdn.net/k352733625/article/details/149222945 import logging +import re + +import requests + # 1、安装Firefox软件【最新】 # https://www.firefox.com.cn/download/#product-desktop-release @@ -21,18 +25,16 @@ chrome://version/ https://googlechromelabs.github.io/chrome-for-testing/ https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip """ -import time, random, re, json, requests +import time from selenium import webdriver -from selenium.webdriver import Chrome -from selenium.webdriver.firefox.options import Options -from selenium.webdriver.common.by import By +from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service as ChromeService -import requests import json -import datetime -def weChat_login(): + + +if __name__ == '__main__': # 定义一个空的字典,存放cookies内容 post = {} # 用webdriver启动谷歌浏览器 @@ -58,10 +60,13 @@ def weChat_login(): if "slave_sid" not in post: logging.info("登录公众号失败,获取cookie失败") - return None - cookie_str = json.dumps(post) - return cookie_str + exit() + cookies = json.dumps(post) + print(cookies) -if __name__ == '__main__': - cookie_str = weChat_login() - print(cookie_str) \ No newline at end of file + print(driver.current_url) + + #url = 'https://mp.weixin.qq.com' + #response = requests.get(url=url, allow_redirects=False, cookies=cookies) + #token = re.findall(r'token=(\d+)', str(response.headers.get("Location")))[0] + #logging.info("微信token:" + token)