from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.common.by import By url = 'http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd' options = Options() options.add_argument('-headless') # 无头参数,调试时可以注释掉 service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe") driver = webdriver.Chrome(service=service, options=options) driver.get(url) # 可以只要txt html_content = driver.find_element(By.CLASS_NAME, "rich_media").text # 第一行是标题,分离出来 title = html_content.split('\n')[0] print(title) # 按行遍历html_content,当发现空行时,删除空行前面的内容,只保留后面的内容 lines = html_content.split('\n') content_after_empty_line = "" found_empty_line = False for line in lines: if not found_empty_line and line.strip() == "": # 找到第一个空行 found_empty_line = True continue if found_empty_line: # 空行后的内容添加到结果中 content_after_empty_line += line + "\n" # 如果没有找到空行,保留原始内容 if not found_empty_line: content_after_empty_line = html_content content_after_empty_line = content_after_empty_line.replace("\n\n", "\n") print(content_after_empty_line) # 关闭浏览器 driver.quit()