'commit'

1 week ago · af62427756
parent 24de098979
commit af62427756
1 changed files with 8 additions and 4 deletions
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@ -27,6 +27,8 @@ async def get_wechat_sources():
            return [dict(row) for row in rows]
    finally:
        await pool.close()
+
+
 """
 # 查看selenium版本
 pip show selenium
@ -100,7 +102,7 @@ if __name__ == '__main__':
    article_urls = []
    # 初始化浏览器
    driver = init_wechat_browser()
-    
+
    # 获取公众号列表
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
@ -108,7 +110,7 @@ if __name__ == '__main__':
        gzlist = loop.run_until_complete(get_wechat_sources())
    finally:
        loop.close()
-    
+
    # 爬取文章
    for item in gzlist:
        account_name = item["account_name"]
@ -159,12 +161,14 @@ if __name__ == '__main__':
            article_url = item.get('link')
            article_title = item.get('title')
            publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
-            
+
            # 直接获取并显示文章内容
+            if '试卷' in article_title: # 过滤掉试卷
+                continue
            print(f"正在处理文章: {article_title} ({publish_time})")
            content = get_article_content(article_url)
            print(f"文章内容预览: {content[:200]}...")
-            
+
            time.sleep(1)
        # 关闭浏览器
    driver.quit()