From ed4aa1bd69a80592b284258656a03bfb4b0b5946 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 15 Jul 2025 11:33:27 +0800 Subject: [PATCH] 'commit' --- dsLightRag/WxGzh/T2_GetArticleList.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py index 0bb8944c..207f5bbd 100644 --- a/dsLightRag/WxGzh/T2_GetArticleList.py +++ b/dsLightRag/WxGzh/T2_GetArticleList.py @@ -23,7 +23,7 @@ async def get_wechat_sources(): try: pool = await init_postgres_pool() async with pool.acquire() as conn: - rows = await conn.fetch('SELECT account_id, account_name FROM t_wechat_source') + rows = await conn.fetch('SELECT * FROM t_wechat_source') return [dict(row) for row in rows] finally: await pool.close() @@ -43,24 +43,24 @@ https://googlechromelabs.github.io/chrome-for-testing/ https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip """ import time -from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service as ChromeService -async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, account_id): + +async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id): try: async with pool.acquire() as conn: - # 确保account_id是整数 - account_id_int = int(account_id) if account_id else 0 + # 更安全的account_id转换逻辑 await conn.execute(''' - INSERT INTO t_wechat_articles - (title, source, url, publish_time, content, source_id) - VALUES ($1, $2, $3, $4, $5, $6) - ''', article_title, account_name, article_url, - publish_time, content, account_id_int) # 修改为整数类型 + INSERT INTO t_wechat_articles + (title, source, url, publish_time, content, source_id) + VALUES ($1, $2, $3, $4, $5, $6) + ''', article_title, account_name, article_url, + publish_time, content, id) except Exception as e: logging.error(f"保存文章失败: {e}") + if __name__ == '__main__': # 从文件cookies.txt中获取 with open('cookies.txt', 'r', encoding='utf-8') as f: @@ -97,7 +97,6 @@ if __name__ == '__main__': } service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe") - driver = webdriver.Chrome(service=service, options=options) # 删除这行 # 使用统一的初始化方式 driver = init_wechat_browser() @@ -114,8 +113,6 @@ if __name__ == '__main__': logging.info("微信token:" + token) article_urls = [] - # 初始化浏览器 - driver = init_wechat_browser() # 获取公众号列表 loop = asyncio.new_event_loop() @@ -129,6 +126,7 @@ if __name__ == '__main__': for item in gzlist: account_name = item["account_name"] account_id = item["account_id"] + id = item["id"] # 搜索微信公众号的接口地址 search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?' # 搜索微信公众号接口需要传入的参数,有三个变量:微信公众号token、随机数random、搜索的微信公众号名字 @@ -187,7 +185,7 @@ if __name__ == '__main__': pool = loop.run_until_complete(init_postgres_pool()) loop.run_until_complete( save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, - account_id)) + id)) finally: loop.run_until_complete(pool.close()) loop.close()