|
|
|
@ -75,10 +75,7 @@ async def is_article_exist(pool, article_url):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, source_id):
|
|
|
|
|
# 先检查文章是否已存在
|
|
|
|
|
if await is_article_exist(pool, article_url):
|
|
|
|
|
logger.info(f"文章已存在,跳过保存: {account_name}-{article_title}")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# 在这里调用 lightrag进行知识库构建
|
|
|
|
|
workspace = 'ChangChun'
|
|
|
|
|
# 使用PG库后,这个是没有用的,但目前的项目代码要求必传,就写一个吧。
|
|
|
|
@ -190,6 +187,10 @@ async def process_single_article(article_info, account_info, cookies, token):
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
pool = await init_postgres_pool()
|
|
|
|
|
# 先检查文章是否已存在
|
|
|
|
|
if await is_article_exist(pool, article_url):
|
|
|
|
|
logger.info(f'文章已存在,跳过保存: {account_info["account_name"]}-{article_title}')
|
|
|
|
|
return False
|
|
|
|
|
content = get_article_content(article_url)
|
|
|
|
|
await save_article_to_db(pool, article_title, account_info["account_name"],
|
|
|
|
|
article_url, publish_time, content, account_info["id"])
|
|
|
|
|