diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py index a72455c0..7069a9c1 100644 --- a/dsLightRag/WxGzh/T2_CollectArticle.py +++ b/dsLightRag/WxGzh/T2_CollectArticle.py @@ -75,10 +75,7 @@ async def is_article_exist(pool, article_url): async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, source_id): - # 先检查文章是否已存在 - if await is_article_exist(pool, article_url): - logger.info(f"文章已存在,跳过保存: {account_name}-{article_title}") - return + # 在这里调用 lightrag进行知识库构建 workspace = 'ChangChun' # 使用PG库后,这个是没有用的,但目前的项目代码要求必传,就写一个吧。 @@ -190,6 +187,10 @@ async def process_single_article(article_info, account_info, cookies, token): try: pool = await init_postgres_pool() + # 先检查文章是否已存在 + if await is_article_exist(pool, article_url): + logger.info(f'文章已存在,跳过保存: {account_info["account_name"]}-{article_title}') + return False content = get_article_content(article_url) await save_article_to_db(pool, article_title, account_info["account_name"], article_url, publish_time, content, account_info["id"])