diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py index 0338bae8..6892c371 100644 --- a/dsLightRag/WxGzh/T2_CollectArticle.py +++ b/dsLightRag/WxGzh/T2_CollectArticle.py @@ -161,6 +161,7 @@ if __name__ == '__main__': # 爬取文章 for item in gzlist: + cnt = 0 account_name = item["account_name"] account_id = item["account_id"] id = item["id"] @@ -205,6 +206,7 @@ if __name__ == '__main__': query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data) fakeid_list = query_fakeid_response.json().get('app_msg_list') + for item in fakeid_list: article_url = item.get('link') article_title = item.get('title') @@ -225,10 +227,13 @@ if __name__ == '__main__': pool = loop.run_until_complete(init_postgres_pool()) loop.run_until_complete( save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id)) + cnt = cnt + 1 finally: loop.run_until_complete(pool.close()) loop.close() - + # 休息1秒,防止频繁访问被封 time.sleep(1) + logger.info(f"成功获取公众号: {account_name} {cnt}篇文章。") # 关闭浏览器 driver.quit() +