diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py index d39b3539..23b89728 100644 --- a/dsLightRag/WxGzh/T2_CollectArticle.py +++ b/dsLightRag/WxGzh/T2_CollectArticle.py @@ -40,6 +40,7 @@ if not logger.handlers: handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) logger.addHandler(handler) + async def get_wechat_sources(): """从t_wechat_source表获取微信公众号列表""" try: @@ -51,13 +52,14 @@ async def get_wechat_sources(): await pool.close() - async def is_article_exist(pool, article_url): """检查文章URL是否已存在数据库中""" try: async with pool.acquire() as conn: row = await conn.fetchrow(''' - SELECT 1 FROM t_wechat_articles WHERE url = $1 LIMIT 1 + SELECT 1 + FROM t_wechat_articles + WHERE url = $1 LIMIT 1 ''', article_url) return row is not None except Exception as e: @@ -125,11 +127,11 @@ if __name__ == '__main__': response = requests.get(url=url, allow_redirects=False, cookies=cookies) if 'Location' in response.headers: redirect_url = response.headers.get("Location") - logger.info(f"重定向URL:%s"%redirect_url) + logger.info(f"重定向URL:%s" % redirect_url) token_match = re.findall(r'token=(\d+)', redirect_url) if token_match: token = token_match[0] - logger.info(f"获取到的token:%s"%token) + logger.info(f"获取到的token:%s" % token) article_urls = [] @@ -203,8 +205,7 @@ if __name__ == '__main__': try: pool = loop.run_until_complete(init_postgres_pool()) loop.run_until_complete( - save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, - id)) + save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id)) finally: loop.run_until_complete(pool.close()) loop.close()