|
|
|
@ -40,6 +40,7 @@ if not logger.handlers:
|
|
|
|
|
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
|
|
|
logger.addHandler(handler)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def get_wechat_sources():
|
|
|
|
|
"""从t_wechat_source表获取微信公众号列表"""
|
|
|
|
|
try:
|
|
|
|
@ -51,13 +52,14 @@ async def get_wechat_sources():
|
|
|
|
|
await pool.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def is_article_exist(pool, article_url):
|
|
|
|
|
"""检查文章URL是否已存在数据库中"""
|
|
|
|
|
try:
|
|
|
|
|
async with pool.acquire() as conn:
|
|
|
|
|
row = await conn.fetchrow('''
|
|
|
|
|
SELECT 1 FROM t_wechat_articles WHERE url = $1 LIMIT 1
|
|
|
|
|
SELECT 1
|
|
|
|
|
FROM t_wechat_articles
|
|
|
|
|
WHERE url = $1 LIMIT 1
|
|
|
|
|
''', article_url)
|
|
|
|
|
return row is not None
|
|
|
|
|
except Exception as e:
|
|
|
|
@ -125,11 +127,11 @@ if __name__ == '__main__':
|
|
|
|
|
response = requests.get(url=url, allow_redirects=False, cookies=cookies)
|
|
|
|
|
if 'Location' in response.headers:
|
|
|
|
|
redirect_url = response.headers.get("Location")
|
|
|
|
|
logger.info(f"重定向URL:%s"%redirect_url)
|
|
|
|
|
logger.info(f"重定向URL:%s" % redirect_url)
|
|
|
|
|
token_match = re.findall(r'token=(\d+)', redirect_url)
|
|
|
|
|
if token_match:
|
|
|
|
|
token = token_match[0]
|
|
|
|
|
logger.info(f"获取到的token:%s"%token)
|
|
|
|
|
logger.info(f"获取到的token:%s" % token)
|
|
|
|
|
|
|
|
|
|
article_urls = []
|
|
|
|
|
|
|
|
|
@ -203,8 +205,7 @@ if __name__ == '__main__':
|
|
|
|
|
try:
|
|
|
|
|
pool = loop.run_until_complete(init_postgres_pool())
|
|
|
|
|
loop.run_until_complete(
|
|
|
|
|
save_article_to_db(pool, article_title, account_name, article_url, publish_time, content,
|
|
|
|
|
id))
|
|
|
|
|
save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id))
|
|
|
|
|
finally:
|
|
|
|
|
loop.run_until_complete(pool.close())
|
|
|
|
|
loop.close()
|
|
|
|
|