main
HuangHai 1 week ago
parent af62427756
commit 7101b19cb7

@ -47,6 +47,20 @@ from selenium import webdriver
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service as ChromeService from selenium.webdriver.chrome.service import Service as ChromeService
async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, account_id):
try:
async with pool.acquire() as conn:
# 确保account_id是整数
account_id_int = int(account_id) if account_id else 0
await conn.execute('''
INSERT INTO t_wechat_articles
(title, source, url, publish_time, content, source_id)
VALUES ($1, $2, $3, $4, $5, $6)
''', article_title, account_name, article_url,
publish_time, content, account_id_int) # 修改为整数类型
except Exception as e:
logging.error(f"保存文章失败: {e}")
if __name__ == '__main__': if __name__ == '__main__':
# 从文件cookies.txt中获取 # 从文件cookies.txt中获取
with open('cookies.txt', 'r', encoding='utf-8') as f: with open('cookies.txt', 'r', encoding='utf-8') as f:
@ -157,17 +171,26 @@ if __name__ == '__main__':
fakeid_list = query_fakeid_response.json().get('app_msg_list') fakeid_list = query_fakeid_response.json().get('app_msg_list')
for item in fakeid_list: for item in fakeid_list:
# 采集item示例
article_url = item.get('link') article_url = item.get('link')
article_title = item.get('title') article_title = item.get('title')
publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S') publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time")))
# 直接获取并显示文章内容 if '试卷' in article_title: # 过滤掉试卷
if '试卷' in article_title: # 过滤掉试卷
continue continue
print(f"正在处理文章: {article_title} ({publish_time})") print(f"正在处理文章: {article_title} ({publish_time})")
content = get_article_content(article_url) content = get_article_content(article_url)
print(f"文章内容预览: {content[:200]}...")
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
pool = loop.run_until_complete(init_postgres_pool())
loop.run_until_complete(
save_article_to_db(pool, article_title, account_name, article_url, publish_time, content,
account_id))
finally:
loop.run_until_complete(pool.close())
loop.close()
time.sleep(1) time.sleep(1)
# 关闭浏览器 # 关闭浏览器

Loading…
Cancel
Save