|
|
|
@ -132,19 +132,20 @@ if __name__ == '__main__':
|
|
|
|
|
# 打开搜索的微信公众号文章列表页
|
|
|
|
|
query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
|
|
|
|
|
fakeid_list = query_fakeid_response.json().get('app_msg_list')
|
|
|
|
|
item = fakeid_list[0]
|
|
|
|
|
# 采集item示例
|
|
|
|
|
new_article = {
|
|
|
|
|
'title': item.get('title'),
|
|
|
|
|
'article_url': item.get('link'),
|
|
|
|
|
'account_id': account_id,
|
|
|
|
|
'account_name': account_name,
|
|
|
|
|
'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
|
'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
}
|
|
|
|
|
logging.info("new_article:", new_article)
|
|
|
|
|
article_urls.append(item.get('link'))
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
|
|
for item in fakeid_list:
|
|
|
|
|
# 采集item示例
|
|
|
|
|
new_article = {
|
|
|
|
|
'title': item.get('title'),
|
|
|
|
|
'article_url': item.get('link'),
|
|
|
|
|
'account_id': account_id,
|
|
|
|
|
'account_name': account_name,
|
|
|
|
|
'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
|
|
'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
}
|
|
|
|
|
logging.info("new_article:", new_article)
|
|
|
|
|
article_urls.append(item.get('link'))
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
for article_url in article_urls:
|
|
|
|
|
print("正在爬取文章:" + article_url)
|
|
|
|
|