main
HuangHai 1 week ago
parent 24de098979
commit af62427756

@ -27,6 +27,8 @@ async def get_wechat_sources():
return [dict(row) for row in rows]
finally:
await pool.close()
"""
# 查看selenium版本
pip show selenium
@ -100,7 +102,7 @@ if __name__ == '__main__':
article_urls = []
# 初始化浏览器
driver = init_wechat_browser()
# 获取公众号列表
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
@ -108,7 +110,7 @@ if __name__ == '__main__':
gzlist = loop.run_until_complete(get_wechat_sources())
finally:
loop.close()
# 爬取文章
for item in gzlist:
account_name = item["account_name"]
@ -159,12 +161,14 @@ if __name__ == '__main__':
article_url = item.get('link')
article_title = item.get('title')
publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
# 直接获取并显示文章内容
if '试卷' in article_title: # 过滤掉试卷
continue
print(f"正在处理文章: {article_title} ({publish_time})")
content = get_article_content(article_url)
print(f"文章内容预览: {content[:200]}...")
time.sleep(1)
# 关闭浏览器
driver.quit()

Loading…
Cancel
Save