From 7101b19cb7fea5bbf75ce13dc95880661ec4ccb3 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 15 Jul 2025 11:28:01 +0800 Subject: [PATCH] 'commit' --- dsLightRag/WxGzh/T2_GetArticleList.py | 33 +++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py index 65af9f41..0bb8944c 100644 --- a/dsLightRag/WxGzh/T2_GetArticleList.py +++ b/dsLightRag/WxGzh/T2_GetArticleList.py @@ -47,6 +47,20 @@ from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service as ChromeService +async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, account_id): + try: + async with pool.acquire() as conn: + # 确保account_id是整数 + account_id_int = int(account_id) if account_id else 0 + await conn.execute(''' + INSERT INTO t_wechat_articles + (title, source, url, publish_time, content, source_id) + VALUES ($1, $2, $3, $4, $5, $6) + ''', article_title, account_name, article_url, + publish_time, content, account_id_int) # 修改为整数类型 + except Exception as e: + logging.error(f"保存文章失败: {e}") + if __name__ == '__main__': # 从文件cookies.txt中获取 with open('cookies.txt', 'r', encoding='utf-8') as f: @@ -157,17 +171,26 @@ if __name__ == '__main__': fakeid_list = query_fakeid_response.json().get('app_msg_list') for item in fakeid_list: - # 采集item示例 article_url = item.get('link') article_title = item.get('title') - publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S') + publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))) - # 直接获取并显示文章内容 - if '试卷' in article_title: # 过滤掉试卷 + if '试卷' in article_title: # 过滤掉试卷 continue + print(f"正在处理文章: {article_title} ({publish_time})") content = get_article_content(article_url) - print(f"文章内容预览: {content[:200]}...") + + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + pool = loop.run_until_complete(init_postgres_pool()) + loop.run_until_complete( + save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, + account_id)) + finally: + loop.run_until_complete(pool.close()) + loop.close() time.sleep(1) # 关闭浏览器