From b4d13486ac77d6810b022f1edcc88f01f119c706 Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Wed, 16 Jul 2025 08:32:10 +0800
Subject: [PATCH 01/17] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../{QuestionController.py => TeachingModelController.py}     | 2 +-
 dsAiTeachingModel/main.py                                     | 2 +-
 dsAiTeachingModel/routes/__init__.py                          | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename dsAiTeachingModel/api/controller/{QuestionController.py => TeachingModelController.py} (90%)

diff --git a/dsAiTeachingModel/api/controller/QuestionController.py b/dsAiTeachingModel/api/controller/TeachingModelController.py
similarity index 90%
rename from dsAiTeachingModel/api/controller/QuestionController.py
rename to dsAiTeachingModel/api/controller/TeachingModelController.py
index 48b7ed39..881472e5 100644
--- a/dsAiTeachingModel/api/controller/QuestionController.py
+++ b/dsAiTeachingModel/api/controller/TeachingModelController.py
@@ -1,4 +1,4 @@
-# routes/QuestionController.py
+# routes/TeachingModelController.py
 
 from fastapi import APIRouter, Request, Response, Depends
 from auth.dependencies import *
diff --git a/dsAiTeachingModel/main.py b/dsAiTeachingModel/main.py
index 8f99a901..d7b3cfdc 100644
--- a/dsAiTeachingModel/main.py
+++ b/dsAiTeachingModel/main.py
@@ -52,7 +52,7 @@ app.include_router(theme_router, prefix="/api/theme", tags=["theme"])
 # 文档相关
 app.include_router(document_router, prefix="/api/document", tags=["document"])
 # 问题相关（大模型应用）
-app.include_router(question_router, prefix="/api/question", tags=["question"])
+app.include_router(teaching_model_router, prefix="/api/teaching/model", tags=["question"])
 # 字典相关(Dm)
 app.include_router(dm_router, prefix="/api/dm", tags=["dm"])
 # 测试相关
diff --git a/dsAiTeachingModel/routes/__init__.py b/dsAiTeachingModel/routes/__init__.py
index 4fa720b9..f985e11c 100644
--- a/dsAiTeachingModel/routes/__init__.py
+++ b/dsAiTeachingModel/routes/__init__.py
@@ -2,10 +2,10 @@
 from api.controller.LoginController import router as login_router
 from api.controller.DocumentController import router as document_router
 from api.controller.ThemeController import router as theme_router
-from api.controller.QuestionController import router as question_router
+from api.controller.TeachingModelController import router as teaching_model_router
 from api.controller.TestController import router as test_router
 from api.controller.DmController import router as dm_router
 from api.controller.UserController import router as user_router
 
 # 导出所有路由
-__all__ = ["login_router", "document_router", "theme_router", "question_router", "dm_router", "test_router", "user_router"]
+__all__ = ["login_router", "document_router", "theme_router", "teaching_model_router", "dm_router", "test_router", "user_router"]

From ea48a15de7d3b4eb794b41cc2c713cb0c8ed815d Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Wed, 16 Jul 2025 08:37:02 +0800
Subject: [PATCH 02/17] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dsAiTeachingModel/{config => config1}/Config.py   | 0
 dsAiTeachingModel/{config => config1}/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename dsAiTeachingModel/{config => config1}/Config.py (100%)
 rename dsAiTeachingModel/{config => config1}/__init__.py (100%)

diff --git a/dsAiTeachingModel/config/Config.py b/dsAiTeachingModel/config1/Config.py
similarity index 100%
rename from dsAiTeachingModel/config/Config.py
rename to dsAiTeachingModel/config1/Config.py
diff --git a/dsAiTeachingModel/config/__init__.py b/dsAiTeachingModel/config1/__init__.py
similarity index 100%
rename from dsAiTeachingModel/config/__init__.py
rename to dsAiTeachingModel/config1/__init__.py

From 9ecaf901641b502992be68c4c2a525d654e69537 Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Wed, 16 Jul 2025 08:37:30 +0800
Subject: [PATCH 03/17] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dsAiTeachingModel/{config1 => Config}/Config.py   | 0
 dsAiTeachingModel/{config1 => Config}/__init__.py | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename dsAiTeachingModel/{config1 => Config}/Config.py (100%)
 rename dsAiTeachingModel/{config1 => Config}/__init__.py (100%)

diff --git a/dsAiTeachingModel/config1/Config.py b/dsAiTeachingModel/Config/Config.py
similarity index 100%
rename from dsAiTeachingModel/config1/Config.py
rename to dsAiTeachingModel/Config/Config.py
diff --git a/dsAiTeachingModel/config1/__init__.py b/dsAiTeachingModel/Config/__init__.py
similarity index 100%
rename from dsAiTeachingModel/config1/__init__.py
rename to dsAiTeachingModel/Config/__init__.py

From 75c06f31eb7974bef57066b6a11d15e785670b54 Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Wed, 16 Jul 2025 09:06:08 +0800
Subject: [PATCH 04/17] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dsAiTeachingModel/main.py                  | 2 +-
 dsAiTeachingModel/tasks/BackgroundTasks.py | 6 ++++--
 dsAiTeachingModel/utils/Database.py        | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/dsAiTeachingModel/main.py b/dsAiTeachingModel/main.py
index d7b3cfdc..72613efa 100644
--- a/dsAiTeachingModel/main.py
+++ b/dsAiTeachingModel/main.py
@@ -23,7 +23,7 @@ async def lifespan(app: FastAPI):
     await init_database()
 
     # 启动异步任务
-    asyncio.create_task(train_document_task())
+    # asyncio.create_task(train_document_task())
 
     yield
     await shutdown_database()
diff --git a/dsAiTeachingModel/tasks/BackgroundTasks.py b/dsAiTeachingModel/tasks/BackgroundTasks.py
index d43dc190..b5eb4a24 100644
--- a/dsAiTeachingModel/tasks/BackgroundTasks.py
+++ b/dsAiTeachingModel/tasks/BackgroundTasks.py
@@ -12,11 +12,13 @@ WORKING_DIR = f"./output"
 # 后台任务，监控是否有新的未训练的文档进行训练
 async def train_document_task():
     print("线程5秒后开始运行【监控是否有新的未训练的文档进行训练】")
+    num = 1
     await asyncio.sleep(5)  # 使用 asyncio.sleep 而不是 time.sleep
     # 这里放置你的线程逻辑
     while True:
         # 这里可以放置你的线程要执行的代码
-        logging.info("开始查询是否有未训练的文档")
+        logging.info("开始查询是否有未训练的文档：" + str(num))
+        num =  num + 1
         no_train_document_sql: str = " SELECT * FROM t_ai_teaching_model_document WHERE is_deleted = 0 and train_flag = 0 ORDER BY create_time DESC"
         no_train_document_result = await find_by_sql(no_train_document_sql, ())
         if not no_train_document_result:
@@ -49,4 +51,4 @@ async def train_document_task():
             # execute_sql(update_sql)
 
             # 添加适当的等待时间，避免频繁查询
-            await asyncio.sleep(60)  # 每分钟查询一次
+        await asyncio.sleep(60)  # 每分钟查询一次
diff --git a/dsAiTeachingModel/utils/Database.py b/dsAiTeachingModel/utils/Database.py
index 4ac15243..4010390d 100644
--- a/dsAiTeachingModel/utils/Database.py
+++ b/dsAiTeachingModel/utils/Database.py
@@ -17,7 +17,7 @@ async def create_pool():
         password=POSTGRES_PASSWORD,
         database=POSTGRES_DATABASE,
         min_size=1,  # 设置连接池最小连接数
-        max_size=100  # 设置连接池最大连接数
+        max_size=10  # 设置连接池最大连接数
     )
 
 async def get_connection():

From 76d0c09bddc2ac2e4909b570e4c0c055ce2b20df Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:09:21 +0800
Subject: [PATCH 05/17] 'commit'

---
 dsLightRag/.idea/dsLightRag.iml               |   2 +-
 dsLightRag/.idea/misc.xml                     |   2 +-
 .../Test/Test/Logs/article_bfc50bb7d7.html    | 162 ------------------
 dsLightRag/WxGzh/T2_CollectArticle.py         |  41 ++---
 4 files changed, 19 insertions(+), 188 deletions(-)
 delete mode 100644 dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html

diff --git a/dsLightRag/.idea/dsLightRag.iml b/dsLightRag/.idea/dsLightRag.iml
index 4ceb6f94..880d61c1 100644
--- a/dsLightRag/.idea/dsLightRag.iml
+++ b/dsLightRag/.idea/dsLightRag.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.10 (4)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="D:\anaconda3\envs\py310" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyDocumentationSettings">
diff --git a/dsLightRag/.idea/misc.xml b/dsLightRag/.idea/misc.xml
index 0bad5868..0f9b3bc1 100644
--- a/dsLightRag/.idea/misc.xml
+++ b/dsLightRag/.idea/misc.xml
@@ -3,5 +3,5 @@
   <component name="Black">
     <option name="sdkName" value="D:\anaconda3\envs\lightrag" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (4)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="D:\anaconda3\envs\py310" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html b/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
deleted file mode 100644
index cd460649..00000000
--- a/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
+++ /dev/null
@@ -1,162 +0,0 @@
-<!DOCTYPE html>
-<html>
-    <head>
-        <meta http-equiv=Content-Type content="text/html;charset=utf-8">
-<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0,viewport-fit=cover">
-<meta name="color-scheme" content="light dark">
-<meta name="apple-mobile-web-app-capable" content="yes">
-<meta name="apple-mobile-web-app-status-bar-style" content="black">
-<meta name="format-detection" content="telephone=no">
-<link rel="shortcut icon" type="image/x-icon" href="//res.wx.qq.com/a/wx_fed/assets/res/NTI4MWU5.ico">
-<link rel="mask-icon" href="//res.wx.qq.com/a/wx_fed/assets/res/MjliNWVm.svg" color="#4C4C4C">
-<link rel="apple-touch-icon-precomposed" href="//res.wx.qq.com/a/wx_fed/assets/res/OTE0YTAw.png">
-
-        <title></title>
-        <script>
-        (() => {
-            
-            const ua = navigator.userAgent;
-            const noMobile = !(/(iPhone|iPad|iPod|iOS)/i.test(ua) || /Windows\sPhone/i.test(ua) || /(Android)/i.test(ua));
-            setTimeout(() => {
-                noMobile && document.title === '' && (document.title = '微信公众平台');
-            }, 1000);
-        })();
-        </script>
-        
-<link rel="stylesheet" type="text/css" href="//res.wx.qq.com/t/wx_fed/weui-source/res/2.5.14/weui.min.css">
-
-<link rel="stylesheet" href="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/style/page/secitptpage/verify7729a9.css" media="all">
-<script type="text/javascript">
-  
-</script>
-
-    </head>
-    <body class="zh_CN " ontouchstart="">
-         
-<div class="weui-msg">
-  <div id="tips" style="display:none;" class="top_tips warning"></div>
-      </div>
-
-<script type="text/javascript">
-var PAGE_MID='mmbizwap:secitptpage/verify.html';
-</script>
-        
-        <style>body,.wx-root{--weui-BG-0:#ededed;--weui-BG-1:#f7f7f7;--weui-BG-2:#fff;--weui-BG-3:#f7f7f7;--weui-BG-4:#4c4c4c;--weui-BG-5:#fff;--weui-FG-0:rgba(0,0,0,0.9);--weui-FG-HALF:rgba(0,0,0,0.9);--weui-FG-1:rgba(0,0,0,0.5);--weui-FG-2:rgba(0,0,0,0.3);--weui-FG-3:rgba(0,0,0,0.1);--weui-FG-4:rgba(0,0,0,0.15);--weui-FG-5:rgba(0,0,0,0.05);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#fa9d3b;--weui-YELLOW:#ffc300;--weui-GREEN:#91d300;--weui-LIGHTGREEN:#95ec69;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1485ee;--weui-PURPLE:#6467f0;--weui-WHITE:#fff;--weui-LINK:#576b95;--weui-TEXTGREEN:#06ae56;--weui-FG:#000;--weui-BG:#fff;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:#fa9d3b;--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:#06ae56;--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:#10aeff;--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(0,0,0,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(0,0,0,0.05)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.8);--weui-FG-HALF:rgba(255,255,255,0.6);--weui-FG-1:rgba(255,255,255,0.5);--weui-FG-2:rgba(255,255,255,0.3);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.8);--weui-FG-HALF:rgba(255,255,255,0.6);--weui-FG-1:rgba(255,255,255,0.5);--weui-FG-2:rgba(255,255,255,0.3);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}.wx-root[data-weui-mode='care'],body[data-weui-mode='care']{--weui-BG-0:#ededed;--weui-BG-1:#f7f7f7;--weui-BG-2:#fff;--weui-BG-3:#f7f7f7;--weui-BG-4:#4c4c4c;--weui-BG-5:#fff;--weui-FG-0:#000;--weui-FG-HALF:#000;--weui-FG-1:rgba(0,0,0,0.6);--weui-FG-2:rgba(0,0,0,0.42);--weui-FG-3:rgba(0,0,0,0.1);--weui-FG-4:rgba(0,0,0,0.15);--weui-FG-5:rgba(0,0,0,0.05);--weui-RED:#dc3636;--weui-REDORANGE:#ff6146;--weui-ORANGE:#e17719;--weui-YELLOW:#bb8e00;--weui-GREEN:#4f8400;--weui-LIGHTGREEN:#2e8800;--weui-BRAND:#018942;--weui-BLUE:#007dbb;--weui-INDIGO:#0075e2;--weui-PURPLE:#6265f1;--weui-WHITE:#fff;--weui-LINK:#576b95;--weui-TEXTGREEN:#06ae56;--weui-FG:#000;--weui-BG:#fff;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:#e17719;--weui-TAG-BACKGROUND-ORANGE:rgba(225,119,25,0.1);--weui-TAG-TEXT-GREEN:#06ae56;--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:#007dbb;--weui-TAG-BACKGROUND-BLUE:rgba(0,125,187,0.1);--weui-TAG-TEXT-BLACK:rgba(0,0,0,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(0,0,0,0.05)}@media(prefers-color-scheme:dark){.wx-root[data-weui-mode='care']:not([data-weui-theme='light']),body[data-weui-mode='care']:not([data-weui-theme='light']){--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.85);--weui-FG-HALF:rgba(255,255,255,0.65);--weui-FG-1:rgba(255,255,255,0.55);--weui-FG-2:rgba(255,255,255,0.35);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}}.wx-root[data-weui-mode='care'][data-weui-theme='dark'],body[data-weui-mode='care'][data-weui-theme='dark']{--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.85);--weui-FG-HALF:rgba(255,255,255,0.65);--weui-FG-1:rgba(255,255,255,0.55);--weui-FG-2:rgba(255,255,255,0.35);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}.wx-root,body{--weui-BG-COLOR-ACTIVE:#ececec}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BG-COLOR-ACTIVE:#373737}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BG-COLOR-ACTIVE:#373737}}body,.wx-root,page{--weui-BTN-HEIGHT:48;--weui-BTN-HEIGHT-MEDIUM:40;--weui-BTN-HEIGHT-SMALL:32}.wx-root,body{--weui-BTN-ACTIVE-MASK:rgba(0,0,0,0.1)}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BTN-ACTIVE-MASK:rgba(255,255,255,0.1)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BTN-ACTIVE-MASK:rgba(255,255,255,0.1)}}.wx-root,body{--weui-BTN-DEFAULT-ACTIVE-BG:#e6e6e6}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BTN-DEFAULT-ACTIVE-BG:rgba(255,255,255,0.126)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BTN-DEFAULT-ACTIVE-BG:rgba(255,255,255,0.126)}}.wx-root,body{--weui-DIALOG-LINE-COLOR:rgba(0,0,0,0.1)}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-DIALOG-LINE-COLOR:rgba(255,255,255,0.1)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-DIALOG-LINE-COLOR:rgba(255,255,255,0.1)}}.weui-hidden_abs{opacity:0;position:absolute;width:1px;height:1px;overflow:hidden}.weui-a11y_ref{display:none}.weui-hidden-space:empty:before{content:"\00A0";position:absolute;width:1px;height:1px;overflow:hidden}.weui-a11y-combo{position:relative}.weui-a11y-combo__helper{opacity:0;position:absolute;width:100%;height:100%;overflow:hidden}.weui-a11y-combo__content{position:relative;z-index:1}.weui-wa-hotarea-el{position:absolute;top:50%;left:50%;-webkit-transform:translate(-50%,-50%);transform:translate(-50%,-50%);min-width:44px;min-height:44px;width:100%;height:100%}.weui-wa-hotarea-el__wrp,.weui-wa-hotarea,.weui-wa-hotarea_before{position:relative}.weui-wa-hotarea-el__wrp a,.weui-wa-hotarea a,.weui-wa-hotarea_before a,.weui-wa-hotarea-el__wrp button,.weui-wa-hotarea button,.weui-wa-hotarea_before button,.weui-wa-hotarea-el__wrp navigator,.weui-wa-hotarea navigator,.weui-wa-hotarea_before navigator{position:relative;z-index:1}.weui-wa-hotarea_before:before,.weui-wa-hotarea:after{content:"";pointer-events:auto;position:absolute;top:50%;left:50%;-webkit-transform:translate(-50%,-50%);transform:translate(-50%,-50%);min-width:44px;min-height:44px;width:100%;height:100%}.test{color:red}body .weui-hidden_abs,body .aria_hidden_abs{position:absolute;opacity:0;overflow:hidden;left:auto;width:1px;height:1px}body .weui-a11y_ref{display:none}body .weui-wa-hotarea_before:before,body .weui-wa-hotarea:after{pointer-events:auto}body .weui-wa-hotarea-el__wrp button{position:relative;z-index:1}</style>
-<script nonce="" type="text/javascript" src="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/lib/wa-helper.min7729a9.js"></script>
-<script nonce="" type="text/javascript">
-  
-</script>
-
-
-
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_colon">：</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_comma">，</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_period">。</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_space">&nbsp;</span>
-
-
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_type_video">视频</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_type_weapp">小程序</span>
-
-
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_zan_btn_txt">赞</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_zan_btn_tips">，轻点两下取消赞</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_like_btn_txt">在看</span>
-<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_like_btn_tips">，轻点两下取消在看</span>
-
-        <script nonce="">
-	var __DEBUGINFO = {
-		debug_js: "//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/debug/console7729a9.js",
-		safe_js: "//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/safe/moonsafe7729a9.js",
-		res_list: []
-	};
-</script>
-
-<script nonce="" type="text/javascript">
-	(function () {
-		var totalCount = 0,
-			finishCount = 0;
-
-		function _loadVConsolePlugin() {
-			window.vConsole = new window.VConsole();
-			while (window.vConsolePlugins.length > 0) {
-				var p = window.vConsolePlugins.shift();
-				window.vConsole.addPlugin(p);
-			}
-			
-			if (!window.vConsole.isInited) {
-				window.vConsole._render();
-				window.vConsole._mockTap();
-				window.vConsole._bindEvent();
-				window.vConsole._autoRun();
-			}
-		}
-
-		function _addScript(uri, cb) {
-			totalCount++;
-			var node = document.createElement('SCRIPT');
-			node.type = 'text/javascript';
-			node.src = uri;
-			node.setAttribute('nonce', '');
-			if (cb) {
-				node.onload = cb;
-			}
-			document.getElementsByTagName('head')[0].appendChild(node);
-		}
-		if (
-			(document.cookie && document.cookie.indexOf('vconsole_open=1') > -1)
-			|| location.href.indexOf('vconsole=1') > -1
-		) {
-			window.vConsolePlugins = [];
-			_addScript('//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/vconsole/3.2.2/vconsole.min7729a7.js', function () {
-				
-				_addScript('//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/vconsole/plugin/vconsole-mpopt/1.0.1/vconsole-mpopt7729a7.js', _loadVConsolePlugin);
-			});
-		}
-		if (document.cookie && document.cookie.indexOf('__xweb_remote_debug_device_token__') > -1) {
-			_addScript('https://mp.weixin.qq.com/mmbizappmsg/zh_CN/htmledition/js/scripts/mprdev-0.2.5.js', function () {
-				_addScript('https://mp.weixin.qq.com/mmbizappmsg/zh_CN/htmledition/js/scripts/xwebrd-0.0.2.js');
-			});
-		}
-
-		
-		try {
-			var adIframeUrl = localStorage.getItem('__WXLS_ad_iframe_url');
-			if (window === top) {
-				if (adIframeUrl) {
-					if (navigator.userAgent.indexOf('iPhone') > -1) {
-						var img = new Image();
-						img.src = adIframeUrl;
-					} else {
-						var link = document.createElement('link');
-						link.rel = 'prefetch';
-						link.href = adIframeUrl;
-						document.getElementsByTagName('head')[0].appendChild(link);
-					}
-				}
-			}
-		} catch (err) {
-
-		}
-
-	})();
-</script>
-        <script>window.__moon_host = 'res.wx.qq.com';window.moon_map = {"biz_wap/utils/ajax_wx.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/ajax_wx7729a9.js","biz_common/utils/respTypes.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/respTypes7729a9.js","biz_common/utils/url/parse.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/url/parse7729a9.js","biz_common/utils/string/html.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/string/html7729a9.js","common/color/dark.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/dark7729a9.js","common/color/light.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/light7729a9.js","biz_wap/utils/mmversion.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/mmversion7729a9.js","biz_wap/jsapi/mpapp_core.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/jsapi/mpapp_core7729a9.js","complain/tips.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/complain/tips7729a9.js","biz_common/dom/event.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/dom/event7729a9.js","biz_wap/utils/ajax.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/ajax7729a9.js","common/color/background_color.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/background_color7729a9.js","biz_wap/jsapi/core.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/jsapi/core7729a9.js","secitptpage/template/verify.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/secitptpage/template/verify7729a9.js"};</script><script type="text/javascript">window.__wxgspeeds={}; window.__wxgspeeds.moonloadtime=+new Date()</script><script  type="text/javascript" src="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/moon7729a9.js"></script>
-<script type="text/javascript" src="https://captcha.gtimg.com/TCaptcha.js"></script>
-<script type="text/javascript">
-window.cgiData = {
-    register_code : "0" * 1,
-    target_url : "https://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==\x26amp;mid=2651526157\x26amp;idx=1\x26amp;sn=bfc50bb7d76462dfe1dd412fceb73b97\x26amp;chksm=84e1ab9fb39622894678cdbb13c5262262a0469cb1cf4326ac1f13b66af9a2e0d562f182174a\x26amp;nwr_flag=1",
-    cap_appid : "2003810213",
-    cap_sid : "4508590775431679609",
-    poc_sid : "HM2zdGijbOCHWQ9Y4bEnUfRRdmr12wCGRZr2tawo",
-    timeout_ms : "30000" * 1,
-    poc_token : "HM2zdGijv176ir9lsab2gSXz7Gxxv32lrfdB-Bxd",
-}
-console.log('window.cgiData', window.cgiData);
-seajs.use('secitptpage/template/verify.js')
-</script>
-
-    </body>
-</html>
-
diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index a6e073bb..de048fb6 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -4,21 +4,34 @@
 # 微信爬爬猫---公众号文章抓取代码分析
 # https://blog.csdn.net/yajuanpi4899/article/details/121584268
 
+"""
+# 查看selenium版本
+pip show selenium
+4.34.2
 
+# 查看Chrome浏览器版本
+chrome://version/
+138.0.7204.101 (正式版本) （64 位）
+
+# 下载驱动包
+https://googlechromelabs.github.io/chrome-for-testing/
+https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
+"""
 import asyncio
 import datetime
 import json
 import logging
 import random
 import re
-
+import time
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service as ChromeService
 import requests
-
 from Util.PostgreSQLUtil import init_postgres_pool
 from Util.WxGzhUtil import init_wechat_browser, get_article_content
 
 # 删除重复的日志配置，只保留以下内容
-logger = logging.getLogger('WeiXinGongZhongHao')
+logger = logging.getLogger('WxGzh')
 logger.setLevel(logging.INFO)
 
 # 确保只添加一个handler
@@ -27,7 +40,6 @@ if not logger.handlers:
     handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
     logger.addHandler(handler)
 
-
 async def get_wechat_sources():
     """从t_wechat_source表获取微信公众号列表"""
     try:
@@ -39,32 +51,13 @@ async def get_wechat_sources():
         await pool.close()
 
 
-"""
-# 查看selenium版本
-pip show selenium
-4.34.2
-
-# 查看Chrome浏览器版本
-chrome://version/
-138.0.7204.101 (正式版本) （64 位）
-
-# 下载驱动包
-https://googlechromelabs.github.io/chrome-for-testing/
-https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
-"""
-import time
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service as ChromeService
-
 
 async def is_article_exist(pool, article_url):
     """检查文章URL是否已存在数据库中"""
     try:
         async with pool.acquire() as conn:
             row = await conn.fetchrow('''
-                                      SELECT 1
-                                      FROM t_wechat_articles
-                                      WHERE url = $1 LIMIT 1
+                                      SELECT 1 FROM t_wechat_articles WHERE url = $1 LIMIT 1
                                       ''', article_url)
             return row is not None
     except Exception as e:

From 642d3af0eaf0948449dd86778ead371918c553fe Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:10:35 +0800
Subject: [PATCH 06/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index de048fb6..d39b3539 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -89,7 +89,6 @@ if __name__ == '__main__':
         content = f.read()
     # 使用json还原为json对象
     cookies = json.loads(content)
-    # "expiry": 1787106233
     # 检查是否有过期时间
     expiry = cookies["expiry"]
     if expiry:
@@ -108,7 +107,7 @@ if __name__ == '__main__':
     logger.info(f"cookies的过期时间一般是4天，cookies过期时间：%s" % expiry_date)
     options = Options()
     options.add_argument('-headless')  # 无头参数，调试时可以注释掉
-    # 设置headers - 使用微信内置浏览器的User-Agent
+    # 设置headers
     header = {
         "HOST": "mp.weixin.qq.com",
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
@@ -121,8 +120,7 @@ if __name__ == '__main__':
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
     # 使用统一的初始化方式
     driver = init_wechat_browser()
-
-    # 方法3：使用requests库发送请求获取重定向URL
+    # 方法：使用requests库发送请求获取重定向URL
     url = 'https://mp.weixin.qq.com'
     response = requests.get(url=url, allow_redirects=False, cookies=cookies)
     if 'Location' in response.headers:
@@ -194,7 +192,7 @@ if __name__ == '__main__':
             article_title = item.get('title')
             publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time")))
 
-            if '试卷' in article_title:  # 过滤掉试卷
+            if '试卷' in article_title:  # 过滤掉试卷,致知物理中有大量试卷，我做教育资讯的不关心试卷
                 continue
 
             logger.info(f"正在处理文章: {article_title} ({publish_time})")

From 2e40111220da01e961c931bd2e802a167ec6e39d Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:11:14 +0800
Subject: [PATCH 07/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index d39b3539..23b89728 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -40,6 +40,7 @@ if not logger.handlers:
     handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
     logger.addHandler(handler)
 
+
 async def get_wechat_sources():
     """从t_wechat_source表获取微信公众号列表"""
     try:
@@ -51,13 +52,14 @@ async def get_wechat_sources():
         await pool.close()
 
 
-
 async def is_article_exist(pool, article_url):
     """检查文章URL是否已存在数据库中"""
     try:
         async with pool.acquire() as conn:
             row = await conn.fetchrow('''
-                                      SELECT 1 FROM t_wechat_articles WHERE url = $1 LIMIT 1
+                                      SELECT 1
+                                      FROM t_wechat_articles
+                                      WHERE url = $1 LIMIT 1
                                       ''', article_url)
             return row is not None
     except Exception as e:
@@ -125,11 +127,11 @@ if __name__ == '__main__':
     response = requests.get(url=url, allow_redirects=False, cookies=cookies)
     if 'Location' in response.headers:
         redirect_url = response.headers.get("Location")
-        logger.info(f"重定向URL:%s"%redirect_url)
+        logger.info(f"重定向URL:%s" % redirect_url)
         token_match = re.findall(r'token=(\d+)', redirect_url)
         if token_match:
             token = token_match[0]
-            logger.info(f"获取到的token:%s"%token)
+            logger.info(f"获取到的token:%s" % token)
 
     article_urls = []
 
@@ -203,8 +205,7 @@ if __name__ == '__main__':
             try:
                 pool = loop.run_until_complete(init_postgres_pool())
                 loop.run_until_complete(
-                    save_article_to_db(pool, article_title, account_name, article_url, publish_time, content,
-                                       id))
+                    save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id))
             finally:
                 loop.run_until_complete(pool.close())
                 loop.close()

From 1180c615d65fe52ac4244dbad15abf080865250b Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:12:47 +0800
Subject: [PATCH 08/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index 23b89728..8aa7476a 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -67,7 +67,7 @@ async def is_article_exist(pool, article_url):
         return False  # 出错时默认返回False，避免影响正常流程
 
 
-async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id):
+async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, source_id):
     # 先检查文章是否已存在
     if await is_article_exist(pool, article_url):
         logger.info(f"文章已存在，跳过保存: {article_url}")
@@ -80,7 +80,7 @@ async def save_article_to_db(pool, article_title, account_name, article_url, pub
                                    (title, source, url, publish_time, content, source_id)
                                VALUES ($1, $2, $3, $4, $5, $6)
                                ''', article_title, account_name, article_url,
-                               publish_time, content, id)
+                               publish_time, content, source_id)
     except Exception as e:
         logging.error(f"保存文章失败: {e}")
 
@@ -198,7 +198,10 @@ if __name__ == '__main__':
                 continue
 
             logger.info(f"正在处理文章: {article_title} ({publish_time})")
+
+            logger.info(f"正在获取文章: {article_title}内容...")
             content = get_article_content(article_url)
+            logger.info(f"成功获取文章: {article_title}内容。")
 
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)

From 1c00fc476180b8407dbd8da026d0a0504ba066de Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:13:35 +0800
Subject: [PATCH 09/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index 8aa7476a..26ae2377 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -72,6 +72,8 @@ async def save_article_to_db(pool, article_title, account_name, article_url, pub
     if await is_article_exist(pool, article_url):
         logger.info(f"文章已存在，跳过保存: {article_url}")
         return
+    # 准备在这里调用 lightrag进行知识库构建
+    # TODO
 
     try:
         async with pool.acquire() as conn:

From d2a79f0c939317d335aa977e53c2f3010e290b08 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:18:27 +0800
Subject: [PATCH 10/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 20 +++++++--
 dsLightRag/WxGzh/T3_TrainIntoKG.py    | 63 ---------------------------
 2 files changed, 17 insertions(+), 66 deletions(-)
 delete mode 100644 dsLightRag/WxGzh/T3_TrainIntoKG.py

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index 26ae2377..0338bae8 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -27,6 +27,8 @@ import time
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
 import requests
+
+from Util.LightRagUtil import initialize_pg_rag
 from Util.PostgreSQLUtil import init_postgres_pool
 from Util.WxGzhUtil import init_wechat_browser, get_article_content
 
@@ -72,9 +74,21 @@ async def save_article_to_db(pool, article_title, account_name, article_url, pub
     if await is_article_exist(pool, article_url):
         logger.info(f"文章已存在，跳过保存: {article_url}")
         return
-    # 准备在这里调用 lightrag进行知识库构建
-    # TODO
-
+    # 在这里调用 lightrag进行知识库构建
+    workspace = 'ChangChun'
+    # 使用PG库后，这个是没有用的,但目前的项目代码要求必传，就写一个吧。
+    WORKING_DIR = f"./output"
+    docx_name = f"{account_name}_{article_title}"  # 组合来源和标题作为文档名
+    logger.info(f"开始处理文档: {docx_name}")
+    try:
+        rag = await initialize_pg_rag(WORKING_DIR=WORKING_DIR, workspace=workspace)
+        await rag.ainsert(input=content, file_paths=[docx_name])
+    finally:
+        if rag:
+            await rag.finalize_storages()
+        if pool:
+            await pool.close()
+    logger.info(f"保存文档到知识库成功: {docx_name}")
     try:
         async with pool.acquire() as conn:
             await conn.execute('''
diff --git a/dsLightRag/WxGzh/T3_TrainIntoKG.py b/dsLightRag/WxGzh/T3_TrainIntoKG.py
deleted file mode 100644
index 86473413..00000000
--- a/dsLightRag/WxGzh/T3_TrainIntoKG.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import asyncio
-import logging
-
-from Util.DocxUtil import get_docx_content_by_pandoc
-from Util.LightRagUtil import initialize_pg_rag
-from Util.PostgreSQLUtil import init_postgres_pool
-
-logger = logging.getLogger('lightrag')
-logger.setLevel(logging.INFO)
-handler = logging.StreamHandler()
-handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-logger.addHandler(handler)
-logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
-
-# 使用PG库后，这个是没有用的,但目前的项目代码要求必传，就写一个吧。
-WORKING_DIR = f"./output"
-
-
-async def get_unprocessed_articles():
-    """从t_wechat_articles表获取未处理的文章"""
-    try:
-        pool = await init_postgres_pool()
-        async with pool.acquire() as conn:
-            rows = await conn.fetch('''
-                SELECT id, source, title, content 
-                FROM t_wechat_articles 
-                WHERE is_finish = 0
-            ''')
-            return [dict(row) for row in rows]
-    finally:
-        await pool.close()
-
-async def main():
-    # 获取未处理的文章
-    articles = await get_unprocessed_articles()
-    logger.info(f"共获取到{len(articles)}篇未处理的文章")
-    
-    for article in articles:
-        workspace = 'ChangChun'
-        docx_name = f"{article['source']}_{article['title']}"  # 组合来源和标题作为文档名
-        content = article["content"]  # 使用文章内容
-        
-        logger.info(f"开始处理文档: {docx_name}")
-        try:
-            rag = await initialize_pg_rag(WORKING_DIR=WORKING_DIR, workspace=workspace)
-            await rag.ainsert(input=content, file_paths=[docx_name])
-            
-            # 标记为已处理
-            pool = await init_postgres_pool()
-            async with pool.acquire() as conn:
-                await conn.execute('''
-                    UPDATE t_wechat_articles 
-                    SET is_finish = 1 
-                    WHERE id = $1
-                ''', article["id"])
-        finally:
-            if rag:
-                await rag.finalize_storages()
-            if pool:
-                await pool.close()
-
-if __name__ == "__main__":
-    asyncio.run(main())

From db0365b16d3844aa5c0e2903e310650ff46250be Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:23:30 +0800
Subject: [PATCH 11/17] 'commit'

---
 .../Config/__pycache__/Config.cpython-310.pyc | Bin 888 -> 888 bytes
 .../__pycache__/LightRagUtil.cpython-310.pyc  | Bin 4511 -> 4511 bytes
 .../PostgreSQLUtil.cpython-310.pyc            | Bin 1565 -> 1565 bytes
 3 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/dsLightRag/Config/__pycache__/Config.cpython-310.pyc b/dsLightRag/Config/__pycache__/Config.cpython-310.pyc
index c551f7f6a39e4d9881c562acb9a358f2569fd42f..73bb1c973c7031619f11244f5f7461787f147b36 100644
GIT binary patch
delta 19
Zcmeyt_JfTppO=@50SGcbZRE;h1^_n;1t9<c

delta 19
Zcmeyt_JfTppO=@50SG?2Y~;#g1^_nI1p5F0

diff --git a/dsLightRag/Util/__pycache__/LightRagUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/LightRagUtil.cpython-310.pyc
index 9bc1e54dac9dbe973c4131d5b80ceb60cf4263d8..d89e8018a6fc1ad3abe25bb0951aa47b9d96253a 100644
GIT binary patch
delta 1003
zcmYL|OK%cU6o8om!<1nL+NOz!u@8bu<D-wn`05*qEiKT}3cWriQ<&1R%nV)zYU4(i
zCb}?pZ#AyEG;wEKx#&O81RIyeKj6ZJiRTPO3Hi9^anIwP!$I^QDq51{cfse+41F(t
zbaR9qxyOLN-ShoKTF$1FmD!>)x4E9o>n_(D7h$WO7hITq_bhOyS<E|eHUhWl89+<e
z)~lMS8X6G{_3_-kt<?!@dM}85=rVv9WQX2y2!HH7Cl7;y_^MW=qET=R@#EYuo979b
zU-OaT5c&&%mbYr{X*A9gvF)rlIf5sGZq{wpENM<CrL0pTfknYul$vVqv_&XL*=kqR
z<9ijjhH-hS*$ry4tk};!`=0R^G2<n+BwXij;_()HE-Z6N_C;9XCs8rQ;@wl+0vnV1
z*pu$_T$;V>ei$XejArjybhntdEvi<uLpPB$V@oTSb+ZB$8HA1cV?#;IDOIRi+fnLm
zPnrf@T%cVTJ;|E>5s3NLKgefLk!45zRc@VyMSJuHI(n!F_0xzM#4KVC5&DN7JSEvD
zaT7`(lOpV=D6^S>%x$o2z!FnvUPLT8eH;gF3(FwJIm)xyo>MF#jc^5KNfns#SLx|h
zG!ikFO5ZtYkM!nJ39FR1Rl8B|=;T}~g`IU|_EH|W;&g#+6S-P76vHZO)%NiE{>wNS
zwZ3nb2>T|7Vfeq~SOMRK_;;I~ha$aYs{{h3?C5CLiQLpm@R%JXiKq?RQVi9EL3EUO
zMRm6EQ7|@%1-0Lh$5Rfmiom3F4dDjF-7T?@T}v-!Qc7ZRjVfr!J8$+QxRg-Qr7M$(
zbMU2Ml%g~aKhP_zG)(?-$uhU~3dOZ*dCi7V+i!K5HR}8uaN>m7p=oINLui6H3+b-g
zlsaqTsp@sR<ul7FrRsjWvURvZHxLCx5zz8!W>cq@NjFhN5O)#x5cd%(4E4cZz`zz%

delta 1003
zcmYL|$xjne7{EJBI~_WmX#q(jgvB%kiixsB5)UpwwXAIc%Nm_b+hIB|OL?6Z8!vJ+
z(S!N!qVdGh#GCQtf&W00C-G?f2RwN5eN(DU^XvPT_pPs|iPMDS%5o&YKG(aCK1iQ<
zj>0)V$@qt#@1^oeE~Bo_ZmV-Uj~;B8fxz1Ug|*;IZUBA+A990`3{4LXvD;*rfr|U4
zr8}CfQ_<F*&NV%~MxhZJldhmk5-|d2p(z&rA~d3m;hnJM8ir1oV_cZx#$aC9W#%_R
z|Mn>Qi|7-w++&@j1S++hRX<1UQZSvGr#U6v52cn(LS<%Au@<$ed6kyP3Q~)$in_wD
zqYD^UAi7s44&<a>_!2%5Zehk5SQ2jw%XnOY7vc(6fUn}Ju!@Q-q&n|%n=mQ&z_ZR#
zZV%pf9wcZit9!>TIo#gxT%sBJ8Bb;1F6!m7=@_gco5DmSd98psWrJwdidt)Vl0BwN
ziKGLg7oZUtXEEPJh6N23ML3VFaR&5B-oz3*21yY0IYb_@hFC|$|Dg*{1^6uOu+k^x
ze)uIRa6hVW705+hX$#Feh+V&ri|B&*n29Nl2r%1q71HuJcL1(zuq}U+pU<F?O8Lz4
z=4E@dJD*9rr43K>>b15`$!9XySzCrz%Fs=}3%_l}vTW6M%evKCUeA9SH>1^>PKm;I
zrH?KDx02k#xzNaAqdh3vU3N=Mz?5wrDWa3o&`WHXZ6)p3>Yl6Gn!^^+R^k=Se~m}6
z<Q*)iHA4YUB}5s4Nr{f&8B#ozwsPysi`k5tUR);)H0=8``x#qGpP)-eHk;zumkgkk
zAXDszD)=zj=5Lf-XWujkJ}Vk>>UOnBJDe)<{|&ft3SRskYxrY)de?uDe9a@opI6W_
sYaR_dWsMN6*{aN7S4ahMfH=emVclt%#C3>;DjVS-TtpRd!j}5vFY3lC1ONa4

diff --git a/dsLightRag/Util/__pycache__/PostgreSQLUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/PostgreSQLUtil.cpython-310.pyc
index d2b6d3e8586bee33674d56d0c2b637599005cb4a..4e49862db4da18e1a6bf5acd1c4c8757d61962b8 100644
GIT binary patch
delta 49
zcmbQsGna=upO=@50SFoy3pa8LvoJ<YmS@Rgl$qScGMQ0svK(tan+#C4NN)0G)+_-2
CDhkd3

delta 49
zcmbQsGna=upO=@50SHcgE7-^_%))qUvOG%`qwM4^mdT9rljT_Z*<^vTMe>t3vt|JR
DI1dbu


From 3af27fe9ff64cfc6bb28f6ced5fdd31e32b98801 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:25:51 +0800
Subject: [PATCH 12/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index 0338bae8..6892c371 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -161,6 +161,7 @@ if __name__ == '__main__':
 
     # 爬取文章
     for item in gzlist:
+        cnt = 0
         account_name = item["account_name"]
         account_id = item["account_id"]
         id = item["id"]
@@ -205,6 +206,7 @@ if __name__ == '__main__':
         query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
         fakeid_list = query_fakeid_response.json().get('app_msg_list')
 
+
         for item in fakeid_list:
             article_url = item.get('link')
             article_title = item.get('title')
@@ -225,10 +227,13 @@ if __name__ == '__main__':
                 pool = loop.run_until_complete(init_postgres_pool())
                 loop.run_until_complete(
                     save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id))
+                cnt = cnt + 1
             finally:
                 loop.run_until_complete(pool.close())
                 loop.close()
-
+            # 休息1秒，防止频繁访问被封
             time.sleep(1)
+        logger.info(f"成功获取公众号: {account_name} {cnt}篇文章。")
         # 关闭浏览器
     driver.quit()
+

From 6d44aac079a046fc81e21701364ac0509240c965 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:44:38 +0800
Subject: [PATCH 13/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 222 +++++++++++++-------------
 1 file changed, 112 insertions(+), 110 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index 6892c371..f1bd6197 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -42,6 +42,11 @@ if not logger.handlers:
     handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
     logger.addHandler(handler)
 
+# 添加微信请求头
+header = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
+    'Referer': 'https://mp.weixin.qq.com/'
+}
 
 async def get_wechat_sources():
     """从t_wechat_source表获取微信公众号列表"""
@@ -101,139 +106,136 @@ async def save_article_to_db(pool, article_title, account_name, article_url, pub
         logging.error(f"保存文章失败: {e}")
 
 
-if __name__ == '__main__':
-    # 从文件cookies.txt中获取
+async def initialize_wechat_session():
+    """初始化微信会话，获取cookies和token"""
     with open('cookies.txt', 'r', encoding='utf-8') as f:
         content = f.read()
-    # 使用json还原为json对象
     cookies = json.loads(content)
-    # 检查是否有过期时间
+    global driver  # 添加这行
     expiry = cookies["expiry"]
     if expiry:
-        # 换算出过期时间
-        expiry_time = time.localtime(expiry)
-        expiry_date = time.strftime("%Y-%m-%d %H:%M:%S", expiry_time)
-
-        # 获取当前时间戳
         current_timestamp = time.time()
-        # 检查是否已过期
         if current_timestamp > expiry:
             logger.error("Cookie已过期")
             exit()
-    # 移除expiry属性
+
     del cookies["expiry"]
-    logger.info(f"cookies的过期时间一般是4天，cookies过期时间：%s" % expiry_date)
-    options = Options()
-    options.add_argument('-headless')  # 无头参数，调试时可以注释掉
-    # 设置headers
-    header = {
-        "HOST": "mp.weixin.qq.com",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-        "Accept-Encoding": "gzip, deflate, br",
-        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
-        "Connection": "keep-alive"
-    }
 
-    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    # 使用统一的初始化方式
+    options = Options()
+    options.add_argument('-headless')
     driver = init_wechat_browser()
-    # 方法：使用requests库发送请求获取重定向URL
+
     url = 'https://mp.weixin.qq.com'
     response = requests.get(url=url, allow_redirects=False, cookies=cookies)
+
     if 'Location' in response.headers:
         redirect_url = response.headers.get("Location")
-        logger.info(f"重定向URL:%s" % redirect_url)
         token_match = re.findall(r'token=(\d+)', redirect_url)
         if token_match:
             token = token_match[0]
-            logger.info(f"获取到的token:%s" % token)
+            return cookies, token
+
+    return None, None
+
+
+async def get_wechat_account_list(cookies, token, account_name):
+    """获取指定公众号的fakeid"""
+    search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
+    query_id = {
+        'action': 'search_biz',
+        'token': token,
+        'lang': 'zh_CN',
+        'f': 'json',
+        'ajax': '1',
+        'random': random.random(),
+        'query': account_name,
+        'begin': '0',
+        'count': '5'
+    }
 
-    article_urls = []
+    search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
+    lists = search_response.json().get('list')[0]
+    return lists.get('fakeid')
+
+
+async def get_article_list(cookies, token, fakeid):
+    """获取公众号文章列表"""
+    appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
+    query_id_data = {
+        'token': token,
+        'lang': 'zh_CN',
+        'f': 'json',
+        'ajax': '1',
+        'random': random.random(),
+        'action': 'list_ex',
+        'begin': '0',
+        'count': '5',
+        'query': '',
+        'fakeid': fakeid,
+        'type': '9'
+    }
+
+    query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
+    return query_fakeid_response.json().get('app_msg_list')
+
+
+async def process_single_article(article_info, account_info, cookies, token):
+    """处理单篇文章"""
+    article_url = article_info.get('link')
+    article_title = article_info.get('title')
+    publish_time = datetime.datetime.fromtimestamp(int(article_info.get("update_time")))
+
+    if '试卷' in article_title:
+        return False
 
-    # 获取公众号列表
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
     try:
-        gzlist = loop.run_until_complete(get_wechat_sources())
+        pool = await init_postgres_pool()
+        content = get_article_content(article_url)
+        await save_article_to_db(pool, article_title, account_info["account_name"],
+                                 article_url, publish_time, content, account_info["id"])
+        return True
+    except Exception as e:
+        logger.error(f"处理文章时出错: {e}")
+        return False
     finally:
-        loop.close()
+        if 'pool' in locals():
+            await pool.close()
+
+
+async def process_wechat_account(account_info, cookies, token):
+    """处理单个公众号的所有文章"""
+    cnt = 0
+    fakeid = await get_wechat_account_list(cookies, token, account_info["account_name"])
+    articles = await get_article_list(cookies, token, fakeid)
+
+    for article in articles:
+        success = await process_single_article(article, account_info, cookies, token)
+        if success:
+            cnt += 1
+        time.sleep(1)
+
+    logger.info(f"成功获取公众号: {account_info['account_name']} {cnt}篇文章。")
+    return cnt
+
+
+async def main():
+    """主函数"""
+    cookies, token = await initialize_wechat_session()
+    if not cookies or not token:
+        logger.error("初始化微信会话失败")
+        return
+
+    account_list = await get_wechat_sources()
+    for account in account_list:
+        await process_wechat_account(account, cookies, token)
 
-    # 爬取文章
-    for item in gzlist:
-        cnt = 0
-        account_name = item["account_name"]
-        account_id = item["account_id"]
-        id = item["id"]
-        # 搜索微信公众号的接口地址
-        search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
-        # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
-        query_id = {
-            'action': 'search_biz',
-            'token': token,
-            'lang': 'zh_CN',
-            'f': 'json',
-            'ajax': '1',
-            'random': random.random(),
-            'query': account_name,
-            'begin': '0',
-            'count': '5'
-        }
-        # 打开搜索微信公众号接口地址，需要传入相关参数信息如：cookies、params、headers
-        search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
-        # 取搜索结果中的第一个公众号
-        lists = search_response.json().get('list')[0]
-        # 获取这个公众号的fakeid，后面爬取公众号文章需要此字段
-        fakeid = lists.get('fakeid')
-        logging.info("fakeid:" + fakeid)
-        # 微信公众号文章接口地址
-        appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
-        # 搜索文章需要传入几个参数：登录的公众号token、要爬取文章的公众号fakeid、随机数random
-        query_id_data = {
-            'token': token,
-            'lang': 'zh_CN',
-            'f': 'json',
-            'ajax': '1',
-            'random': random.random(),
-            'action': 'list_ex',
-            'begin': '0',  # 不同页，此参数变化，变化规则为每页加5
-            'count': '5',
-            'query': '',
-            'fakeid': fakeid,
-            'type': '9'
-        }
-        # 打开搜索的微信公众号文章列表页
-        query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
-        fakeid_list = query_fakeid_response.json().get('app_msg_list')
-
-
-        for item in fakeid_list:
-            article_url = item.get('link')
-            article_title = item.get('title')
-            publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time")))
-
-            if '试卷' in article_title:  # 过滤掉试卷,致知物理中有大量试卷，我做教育资讯的不关心试卷
-                continue
-
-            logger.info(f"正在处理文章: {article_title} ({publish_time})")
-
-            logger.info(f"正在获取文章: {article_title}内容...")
-            content = get_article_content(article_url)
-            logger.info(f"成功获取文章: {article_title}内容。")
-
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            try:
-                pool = loop.run_until_complete(init_postgres_pool())
-                loop.run_until_complete(
-                    save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id))
-                cnt = cnt + 1
-            finally:
-                loop.run_until_complete(pool.close())
-                loop.close()
-            # 休息1秒，防止频繁访问被封
-            time.sleep(1)
-        logger.info(f"成功获取公众号: {account_name} {cnt}篇文章。")
-        # 关闭浏览器
     driver.quit()
 
+
+if __name__ == '__main__':
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.close()

From 54a1ba265f9119375c84c8ce3574f529cb01cfcf Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:45:30 +0800
Subject: [PATCH 14/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index f1bd6197..a72455c0 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -77,7 +77,7 @@ async def is_article_exist(pool, article_url):
 async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, source_id):
     # 先检查文章是否已存在
     if await is_article_exist(pool, article_url):
-        logger.info(f"文章已存在，跳过保存: {article_url}")
+        logger.info(f"文章已存在，跳过保存: {account_name}-{article_title}")
         return
     # 在这里调用 lightrag进行知识库构建
     workspace = 'ChangChun'

From 3fb6bd95b2197ba93c57c2c93d39282535092d05 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 09:59:47 +0800
Subject: [PATCH 15/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index a72455c0..7069a9c1 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -75,10 +75,7 @@ async def is_article_exist(pool, article_url):
 
 
 async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, source_id):
-    # 先检查文章是否已存在
-    if await is_article_exist(pool, article_url):
-        logger.info(f"文章已存在，跳过保存: {account_name}-{article_title}")
-        return
+
     # 在这里调用 lightrag进行知识库构建
     workspace = 'ChangChun'
     # 使用PG库后，这个是没有用的,但目前的项目代码要求必传，就写一个吧。
@@ -190,6 +187,10 @@ async def process_single_article(article_info, account_info, cookies, token):
 
     try:
         pool = await init_postgres_pool()
+        # 先检查文章是否已存在
+        if await is_article_exist(pool, article_url):
+            logger.info(f'文章已存在，跳过保存: {account_info["account_name"]}-{article_title}')
+            return False
         content = get_article_content(article_url)
         await save_article_to_db(pool, article_title, account_info["account_name"],
                                  article_url, publish_time, content, account_info["id"])

From 3fec770149ea340009d156700099f12b16789c33 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 10:01:21 +0800
Subject: [PATCH 16/17] 'commit'

---
 dsLightRag/WxGzh/T2_CollectArticle.py | 30 ++++++++++++++++++---------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_CollectArticle.py b/dsLightRag/WxGzh/T2_CollectArticle.py
index 7069a9c1..051e82dc 100644
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@@ -221,16 +221,26 @@ async def process_wechat_account(account_info, cookies, token):
 
 async def main():
     """主函数"""
-    cookies, token = await initialize_wechat_session()
-    if not cookies or not token:
-        logger.error("初始化微信会话失败")
-        return
-
-    account_list = await get_wechat_sources()
-    for account in account_list:
-        await process_wechat_account(account, cookies, token)
-
-    driver.quit()
+    while True:
+        try:
+            logger.info("开始执行微信公众号文章采集任务")
+            cookies, token = await initialize_wechat_session()
+            if not cookies or not token:
+                logger.error("初始化微信会话失败")
+                continue
+
+            account_list = await get_wechat_sources()
+            for account in account_list:
+                await process_wechat_account(account, cookies, token)
+
+            logger.info("本次采集任务完成，等待30分钟后再次执行")
+            await asyncio.sleep(30 * 60)  # 30分钟
+        except Exception as e:
+            logger.error(f"主循环发生错误: {e}")
+            await asyncio.sleep(30 * 60)  # 出错后也等待30分钟
+        finally:
+            if 'driver' in globals():
+                driver.quit()
 
 
 if __name__ == '__main__':

From 1cb0580377e420ce33fc788e0989e22ae4633ea0 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Wed, 16 Jul 2025 10:10:20 +0800
Subject: [PATCH 17/17] 'commit'

---
 dsLightRag/static/ChangChun.html | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dsLightRag/static/ChangChun.html b/dsLightRag/static/ChangChun.html
index b71919b8..7b628e3a 100644
--- a/dsLightRag/static/ChangChun.html
+++ b/dsLightRag/static/ChangChun.html
@@ -217,6 +217,10 @@
                     <div class="example-item" onclick="fillExample('力旺实验中学今年的中考成绩怎么样？')">
                         力旺实验中学今年的中考成绩怎么样？
                     </div>
+                    <div class="example-item" onclick="fillExample('在730分占比中，哪些学校表现优秀？')">
+                        在730分占比中，哪些学校表现优秀？
+                    </div>
+
                 </div>
             </div>
         </div>