from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.common.by import By def init_wechat_browser(): """初始化微信爬虫浏览器实例""" options = Options() options.add_argument('-headless') service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe") return webdriver.Chrome(service=service, options=options) def get_article_content(url): """ 获取微信公众号文章内容 :param url: 文章URL :return: 文章内容文本 """ options = Options() options.add_argument('-headless') service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe") driver = webdriver.Chrome(service=service, options=options) try: driver.get(url) html_content = driver.find_element(By.CLASS_NAME, "rich_media").text # 处理内容,提取空行后的文本 lines = html_content.split('\n') content_after_empty_line = "" found_empty_line = False for line in lines: if not found_empty_line and line.strip() == "": found_empty_line = True continue if found_empty_line: content_after_empty_line += line + "\n" if not found_empty_line: content_after_empty_line = html_content return content_after_empty_line.replace("\n\n", "\n") finally: driver.quit()