From 9ed09f13d6cd12662def6bf3a3c4ff3af0a6c9df Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 14 Jul 2025 15:19:13 +0800 Subject: [PATCH] 'commit' --- dsLightRag/Test/TestCrawl.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py index 95e70278..7a9a4e6d 100644 --- a/dsLightRag/Test/TestCrawl.py +++ b/dsLightRag/Test/TestCrawl.py @@ -132,19 +132,20 @@ if __name__ == '__main__': # 打开搜索的微信公众号文章列表页 query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data) fakeid_list = query_fakeid_response.json().get('app_msg_list') - item = fakeid_list[0] - # 采集item示例 - new_article = { - 'title': item.get('title'), - 'article_url': item.get('link'), - 'account_id': account_id, - 'account_name': account_name, - 'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'), - 'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - } - logging.info("new_article:", new_article) - article_urls.append(item.get('link')) - time.sleep(2) + + for item in fakeid_list: + # 采集item示例 + new_article = { + 'title': item.get('title'), + 'article_url': item.get('link'), + 'account_id': account_id, + 'account_name': account_name, + 'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'), + 'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + } + logging.info("new_article:", new_article) + article_urls.append(item.get('link')) + time.sleep(1) for article_url in article_urls: print("正在爬取文章:" + article_url)