|
|
|
@ -47,6 +47,20 @@ from selenium import webdriver
|
|
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
|
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
|
|
|
|
|
|
|
|
|
async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, account_id):
|
|
|
|
|
try:
|
|
|
|
|
async with pool.acquire() as conn:
|
|
|
|
|
# 确保account_id是整数
|
|
|
|
|
account_id_int = int(account_id) if account_id else 0
|
|
|
|
|
await conn.execute('''
|
|
|
|
|
INSERT INTO t_wechat_articles
|
|
|
|
|
(title, source, url, publish_time, content, source_id)
|
|
|
|
|
VALUES ($1, $2, $3, $4, $5, $6)
|
|
|
|
|
''', article_title, account_name, article_url,
|
|
|
|
|
publish_time, content, account_id_int) # 修改为整数类型
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logging.error(f"保存文章失败: {e}")
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
# 从文件cookies.txt中获取
|
|
|
|
|
with open('cookies.txt', 'r', encoding='utf-8') as f:
|
|
|
|
@ -157,17 +171,26 @@ if __name__ == '__main__':
|
|
|
|
|
fakeid_list = query_fakeid_response.json().get('app_msg_list')
|
|
|
|
|
|
|
|
|
|
for item in fakeid_list:
|
|
|
|
|
# 采集item示例
|
|
|
|
|
article_url = item.get('link')
|
|
|
|
|
article_title = item.get('title')
|
|
|
|
|
publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
|
|
publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time")))
|
|
|
|
|
|
|
|
|
|
# 直接获取并显示文章内容
|
|
|
|
|
if '试卷' in article_title: # 过滤掉试卷
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
print(f"正在处理文章: {article_title} ({publish_time})")
|
|
|
|
|
content = get_article_content(article_url)
|
|
|
|
|
print(f"文章内容预览: {content[:200]}...")
|
|
|
|
|
|
|
|
|
|
loop = asyncio.new_event_loop()
|
|
|
|
|
asyncio.set_event_loop(loop)
|
|
|
|
|
try:
|
|
|
|
|
pool = loop.run_until_complete(init_postgres_pool())
|
|
|
|
|
loop.run_until_complete(
|
|
|
|
|
save_article_to_db(pool, article_title, account_name, article_url, publish_time, content,
|
|
|
|
|
account_id))
|
|
|
|
|
finally:
|
|
|
|
|
loop.run_until_complete(pool.close())
|
|
|
|
|
loop.close()
|
|
|
|
|
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
# 关闭浏览器
|
|
|
|
|