From 8bfd82e6de7ffe432b3674d2ab764e823f841b58 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 14:32:49 +0800
Subject: [PATCH 01/46] 'commit'

---
 dsLightRag/.idea/dsLightRag.iml |  2 +-
 dsLightRag/.idea/misc.xml       |  2 +-
 dsLightRag/Test/TestCrawl.py    | 67 +++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 dsLightRag/Test/TestCrawl.py
diff --git a/dsLightRag/.idea/dsLightRag.iml b/dsLightRag/.idea/dsLightRag.iml
index 4ceb6f94..880d61c1 100644
--- a/dsLightRag/.idea/dsLightRag.iml
+++ b/dsLightRag/.idea/dsLightRag.iml
@@ -2,7 +2,7 @@
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
     <content url="file://$MODULE_DIR$" />
-    <orderEntry type="jdk" jdkName="Python 3.10 (4)" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="D:\anaconda3\envs\py310" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyDocumentationSettings">
diff --git a/dsLightRag/.idea/misc.xml b/dsLightRag/.idea/misc.xml
index 0bad5868..0f9b3bc1 100644
--- a/dsLightRag/.idea/misc.xml
+++ b/dsLightRag/.idea/misc.xml
@@ -3,5 +3,5 @@
   <component name="Black">
     <option name="sdkName" value="D:\anaconda3\envs\lightrag" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (4)" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="D:\anaconda3\envs\py310" project-jdk-type="Python SDK" />
 </project>
\ No newline at end of file
diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
new file mode 100644
index 00000000..b9d6ff78
--- /dev/null
+++ b/dsLightRag/Test/TestCrawl.py
@@ -0,0 +1,67 @@
+# 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
+# https://blog.csdn.net/k352733625/article/details/149222945
+import logging
+# 1、安装Firefox软件【最新】
+# https://www.firefox.com.cn/download/#product-desktop-release
+
+# 2、下载geckodriver驱动【最新】
+# https://splinter-docs-zh-cn.readthedocs.io/zh/latest/drivers/firefox.html
+# https://github.com/mozilla/geckodriver/releases
+
+"""
+# 查看selenium版本
+pip show selenium
+4.34.2
+
+# 查看Chrome浏览器版本
+chrome://version/
+138.0.7204.101 (正式版本) （64 位）
+
+# 下载驱动包
+https://googlechromelabs.github.io/chrome-for-testing/
+https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
+"""
+import time, random, re, json, requests
+from selenium import webdriver
+from selenium.webdriver import Chrome
+from selenium.webdriver.firefox.options import Options
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service as ChromeService
+import requests
+import json
+import datetime
+
+
+def weChat_login():
+    # 定义一个空的字典，存放cookies内容
+    post = {}
+    # 用webdriver启动谷歌浏览器
+    logging.info("启动浏览器，打开微信公众号登录界面")
+    options = Options()
+    options.add_argument('-headless')  # 无头参数
+    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+    driver = webdriver.Chrome(service=service)
+    # 打开微信公众号登录页面
+    driver.get('https://mp.weixin.qq.com/')
+    # 等待5秒钟
+    time.sleep(2)
+    # # 拿手机扫二维码！
+    logging.info("请拿手机扫码二维码登录公众号")
+    time.sleep(20)
+    # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
+    driver.get('https://mp.weixin.qq.com/')
+    # 获取cookies
+    cookie_items = driver.get_cookies()
+    # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
+    for cookie_item in cookie_items:
+        post[cookie_item['name']] = cookie_item['value']
+
+    if "slave_sid" not in post:
+        logging.info("登录公众号失败，获取cookie失败")
+        return None
+    cookie_str = json.dumps(post)
+    return cookie_str
+
+if __name__ == '__main__':
+    cookie_str = weChat_login()
+    print(cookie_str)
\ No newline at end of file

From 397b04baaf0de1909c01afa3935199ef50830113 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 14:46:57 +0800
Subject: [PATCH 02/46] 'commit'

---
 dsLightRag/Test/TestCrawl.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
index b9d6ff78..8fcde632 100644
--- a/dsLightRag/Test/TestCrawl.py
+++ b/dsLightRag/Test/TestCrawl.py
@@ -1,6 +1,10 @@
 # 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
 # https://blog.csdn.net/k352733625/article/details/149222945
 import logging
+import re
+
+import requests
+
 # 1、安装Firefox软件【最新】
 # https://www.firefox.com.cn/download/#product-desktop-release
 
@@ -21,18 +25,16 @@ chrome://version/
 https://googlechromelabs.github.io/chrome-for-testing/
 https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
 """
-import time, random, re, json, requests
+import time
 from selenium import webdriver
-from selenium.webdriver import Chrome
-from selenium.webdriver.firefox.options import Options
-from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
-import requests
 import json
-import datetime
 
 
-def weChat_login():
+
+
+if __name__ == '__main__':
     # 定义一个空的字典，存放cookies内容
     post = {}
     # 用webdriver启动谷歌浏览器
@@ -58,10 +60,13 @@ def weChat_login():
 
     if "slave_sid" not in post:
         logging.info("登录公众号失败，获取cookie失败")
-        return None
-    cookie_str = json.dumps(post)
-    return cookie_str
+        exit()
+    cookies = json.dumps(post)
+    print(cookies)
 
-if __name__ == '__main__':
-    cookie_str = weChat_login()
-    print(cookie_str)
\ No newline at end of file
+    print(driver.current_url)
+
+    #url = 'https://mp.weixin.qq.com'
+    #response = requests.get(url=url, allow_redirects=False, cookies=cookies)
+    #token = re.findall(r'token=(\d+)', str(response.headers.get("Location")))[0]
+    #logging.info("微信token:" + token)

From 351caa4b5bfa863475cbe58956ca32d665a1d478 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 14:51:43 +0800
Subject: [PATCH 03/46] 'commit'

---
 dsLightRag/Test/TestCrawl.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
index 8fcde632..ba3d7b66 100644
--- a/dsLightRag/Test/TestCrawl.py
+++ b/dsLightRag/Test/TestCrawl.py
@@ -62,9 +62,18 @@ if __name__ == '__main__':
         logging.info("登录公众号失败，获取cookie失败")
         exit()
     cookies = json.dumps(post)
-    print(cookies)
 
-    print(driver.current_url)
+    # 方法3：使用requests库发送请求获取重定向URL
+    url = 'https://mp.weixin.qq.com'
+    response = requests.get(url=url, allow_redirects=False, cookies=post)
+    if 'Location' in response.headers:
+        redirect_url = response.headers.get("Location")
+        print("重定向URL:", redirect_url)
+        token_match = re.findall(r'token=(\d+)', redirect_url)
+        if token_match:
+            token = token_match[0]
+            print("获取到的token:", token)
+            logging.info("微信token:" + token)
 
     #url = 'https://mp.weixin.qq.com'
     #response = requests.get(url=url, allow_redirects=False, cookies=cookies)

From be105894af0613741967ec2eb153cce1ef09e82f Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 15:07:59 +0800
Subject: [PATCH 04/46] 'commit'

---
 dsLightRag/Test/TestCrawl.py | 94 +++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 12 deletions(-)

diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
index ba3d7b66..95e70278 100644
--- a/dsLightRag/Test/TestCrawl.py
+++ b/dsLightRag/Test/TestCrawl.py
@@ -1,6 +1,11 @@
 # 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
 # https://blog.csdn.net/k352733625/article/details/149222945
+
+# 微信爬爬猫---公众号文章抓取代码分析
+# https://blog.csdn.net/yajuanpi4899/article/details/121584268
+import datetime
 import logging
+import random
 import re
 
 import requests
@@ -12,6 +17,10 @@ import requests
 # https://splinter-docs-zh-cn.readthedocs.io/zh/latest/drivers/firefox.html
 # https://github.com/mozilla/geckodriver/releases
 
+# 3、Python爬虫实战系列：微信公众号文章爬取的5种技术方案总结及代码示例！
+# 方案5：微信公众号后台引用链接方式爬取
+# https://blog.csdn.net/Python_trys/article/details/146506009
+
 """
 # 查看selenium版本
 pip show selenium
@@ -31,12 +40,14 @@ from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
 import json
 
-
-
-
 if __name__ == '__main__':
     # 定义一个空的字典，存放cookies内容
-    post = {}
+    cookies = {}
+    # 设置headers
+    header = {
+        "HOST": "mp.weixin.qq.com",
+        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0"
+    }
     # 用webdriver启动谷歌浏览器
     logging.info("启动浏览器，打开微信公众号登录界面")
     options = Options()
@@ -50,22 +61,23 @@ if __name__ == '__main__':
     # # 拿手机扫二维码！
     logging.info("请拿手机扫码二维码登录公众号")
     time.sleep(20)
+
     # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
     driver.get('https://mp.weixin.qq.com/')
     # 获取cookies
     cookie_items = driver.get_cookies()
     # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
     for cookie_item in cookie_items:
-        post[cookie_item['name']] = cookie_item['value']
+        cookies[cookie_item['name']] = cookie_item['value']
 
-    if "slave_sid" not in post:
+    if "slave_sid" not in cookies:
         logging.info("登录公众号失败，获取cookie失败")
         exit()
-    cookies = json.dumps(post)
+    # cookies = json.dumps(post)  # 注释掉这一行
 
     # 方法3：使用requests库发送请求获取重定向URL
     url = 'https://mp.weixin.qq.com'
-    response = requests.get(url=url, allow_redirects=False, cookies=post)
+    response = requests.get(url=url, allow_redirects=False, cookies=cookies)
     if 'Location' in response.headers:
         redirect_url = response.headers.get("Location")
         print("重定向URL:", redirect_url)
@@ -75,7 +87,65 @@ if __name__ == '__main__':
             print("获取到的token:", token)
             logging.info("微信token:" + token)
 
-    #url = 'https://mp.weixin.qq.com'
-    #response = requests.get(url=url, allow_redirects=False, cookies=cookies)
-    #token = re.findall(r'token=(\d+)', str(response.headers.get("Location")))[0]
-    #logging.info("微信token:" + token)
+    article_urls = []
+    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"}]
+    for item in gzlist:
+        account_name = item["account_name"]
+        account_id = item["account_id"]
+        # 搜索微信公众号的接口地址
+        search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
+        # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
+        query_id = {
+            'action': 'search_biz',
+            'token': token,
+            'lang': 'zh_CN',
+            'f': 'json',
+            'ajax': '1',
+            'random': random.random(),
+            'query': account_name,
+            'begin': '0',
+            'count': '5'
+        }
+        # 打开搜索微信公众号接口地址，需要传入相关参数信息如：cookies、params、headers
+        search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
+        # 取搜索结果中的第一个公众号
+        lists = search_response.json().get('list')[0]
+        # 获取这个公众号的fakeid，后面爬取公众号文章需要此字段
+        fakeid = lists.get('fakeid')
+        logging.info("fakeid:" + fakeid)
+        # 微信公众号文章接口地址
+        appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
+        # 搜索文章需要传入几个参数：登录的公众号token、要爬取文章的公众号fakeid、随机数random
+        query_id_data = {
+            'token': token,
+            'lang': 'zh_CN',
+            'f': 'json',
+            'ajax': '1',
+            'random': random.random(),
+            'action': 'list_ex',
+            'begin': '0',  # 不同页，此参数变化，变化规则为每页加5
+            'count': '5',
+            'query': '',
+            'fakeid': fakeid,
+            'type': '9'
+        }
+        # 打开搜索的微信公众号文章列表页
+        query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
+        fakeid_list = query_fakeid_response.json().get('app_msg_list')
+        item = fakeid_list[0]
+        # 采集item示例
+        new_article = {
+            'title': item.get('title'),
+            'article_url': item.get('link'),
+            'account_id': account_id,
+            'account_name': account_name,
+            'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
+            'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        }
+        logging.info("new_article:", new_article)
+        article_urls.append(item.get('link'))
+        time.sleep(2)
+
+    for article_url in article_urls:
+        print("正在爬取文章：" + article_url)
+

From 9ed09f13d6cd12662def6bf3a3c4ff3af0a6c9df Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 15:19:13 +0800
Subject: [PATCH 05/46] 'commit'

---
 dsLightRag/Test/TestCrawl.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
index 95e70278..7a9a4e6d 100644
--- a/dsLightRag/Test/TestCrawl.py
+++ b/dsLightRag/Test/TestCrawl.py
@@ -132,19 +132,20 @@ if __name__ == '__main__':
         # 打开搜索的微信公众号文章列表页
         query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
         fakeid_list = query_fakeid_response.json().get('app_msg_list')
-        item = fakeid_list[0]
-        # 采集item示例
-        new_article = {
-            'title': item.get('title'),
-            'article_url': item.get('link'),
-            'account_id': account_id,
-            'account_name': account_name,
-            'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
-            'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-        }
-        logging.info("new_article:", new_article)
-        article_urls.append(item.get('link'))
-        time.sleep(2)
+
+        for item in fakeid_list:
+            # 采集item示例
+            new_article = {
+                'title': item.get('title'),
+                'article_url': item.get('link'),
+                'account_id': account_id,
+                'account_name': account_name,
+                'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
+                'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            }
+            logging.info("new_article:", new_article)
+            article_urls.append(item.get('link'))
+            time.sleep(1)
 
     for article_url in article_urls:
         print("正在爬取文章：" + article_url)

From 69e8e833e6c6299d75cb0312118cb5b0f4782dac Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 15:41:30 +0800
Subject: [PATCH 06/46] 'commit'

---
 .../Test/Test/Logs/article_bfc50bb7d7.html    | 162 ++++++++++++++++++
 dsLightRag/Test/TestCrawl.py                  | 148 ++++++++++++++--
 2 files changed, 295 insertions(+), 15 deletions(-)
 create mode 100644 dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html

diff --git a/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html b/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
new file mode 100644
index 00000000..cd460649
--- /dev/null
+++ b/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
@@ -0,0 +1,162 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv=Content-Type content="text/html;charset=utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0,viewport-fit=cover">
+<meta name="color-scheme" content="light dark">
+<meta name="apple-mobile-web-app-capable" content="yes">
+<meta name="apple-mobile-web-app-status-bar-style" content="black">
+<meta name="format-detection" content="telephone=no">
+<link rel="shortcut icon" type="image/x-icon" href="//res.wx.qq.com/a/wx_fed/assets/res/NTI4MWU5.ico">
+<link rel="mask-icon" href="//res.wx.qq.com/a/wx_fed/assets/res/MjliNWVm.svg" color="#4C4C4C">
+<link rel="apple-touch-icon-precomposed" href="//res.wx.qq.com/a/wx_fed/assets/res/OTE0YTAw.png">
+
+        <title></title>
+        <script>
+        (() => {
+            
+            const ua = navigator.userAgent;
+            const noMobile = !(/(iPhone|iPad|iPod|iOS)/i.test(ua) || /Windows\sPhone/i.test(ua) || /(Android)/i.test(ua));
+            setTimeout(() => {
+                noMobile && document.title === '' && (document.title = '微信公众平台');
+            }, 1000);
+        })();
+        </script>
+        
+<link rel="stylesheet" type="text/css" href="//res.wx.qq.com/t/wx_fed/weui-source/res/2.5.14/weui.min.css">
+
+<link rel="stylesheet" href="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/style/page/secitptpage/verify7729a9.css" media="all">
+<script type="text/javascript">
+  
+</script>
+
+    </head>
+    <body class="zh_CN " ontouchstart="">
+         
+<div class="weui-msg">
+  <div id="tips" style="display:none;" class="top_tips warning"></div>
+      </div>
+
+<script type="text/javascript">
+var PAGE_MID='mmbizwap:secitptpage/verify.html';
+</script>
+        
+        <style>body,.wx-root{--weui-BG-0:#ededed;--weui-BG-1:#f7f7f7;--weui-BG-2:#fff;--weui-BG-3:#f7f7f7;--weui-BG-4:#4c4c4c;--weui-BG-5:#fff;--weui-FG-0:rgba(0,0,0,0.9);--weui-FG-HALF:rgba(0,0,0,0.9);--weui-FG-1:rgba(0,0,0,0.5);--weui-FG-2:rgba(0,0,0,0.3);--weui-FG-3:rgba(0,0,0,0.1);--weui-FG-4:rgba(0,0,0,0.15);--weui-FG-5:rgba(0,0,0,0.05);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#fa9d3b;--weui-YELLOW:#ffc300;--weui-GREEN:#91d300;--weui-LIGHTGREEN:#95ec69;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1485ee;--weui-PURPLE:#6467f0;--weui-WHITE:#fff;--weui-LINK:#576b95;--weui-TEXTGREEN:#06ae56;--weui-FG:#000;--weui-BG:#fff;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:#fa9d3b;--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:#06ae56;--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:#10aeff;--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(0,0,0,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(0,0,0,0.05)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.8);--weui-FG-HALF:rgba(255,255,255,0.6);--weui-FG-1:rgba(255,255,255,0.5);--weui-FG-2:rgba(255,255,255,0.3);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.8);--weui-FG-HALF:rgba(255,255,255,0.6);--weui-FG-1:rgba(255,255,255,0.5);--weui-FG-2:rgba(255,255,255,0.3);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}.wx-root[data-weui-mode='care'],body[data-weui-mode='care']{--weui-BG-0:#ededed;--weui-BG-1:#f7f7f7;--weui-BG-2:#fff;--weui-BG-3:#f7f7f7;--weui-BG-4:#4c4c4c;--weui-BG-5:#fff;--weui-FG-0:#000;--weui-FG-HALF:#000;--weui-FG-1:rgba(0,0,0,0.6);--weui-FG-2:rgba(0,0,0,0.42);--weui-FG-3:rgba(0,0,0,0.1);--weui-FG-4:rgba(0,0,0,0.15);--weui-FG-5:rgba(0,0,0,0.05);--weui-RED:#dc3636;--weui-REDORANGE:#ff6146;--weui-ORANGE:#e17719;--weui-YELLOW:#bb8e00;--weui-GREEN:#4f8400;--weui-LIGHTGREEN:#2e8800;--weui-BRAND:#018942;--weui-BLUE:#007dbb;--weui-INDIGO:#0075e2;--weui-PURPLE:#6265f1;--weui-WHITE:#fff;--weui-LINK:#576b95;--weui-TEXTGREEN:#06ae56;--weui-FG:#000;--weui-BG:#fff;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:#e17719;--weui-TAG-BACKGROUND-ORANGE:rgba(225,119,25,0.1);--weui-TAG-TEXT-GREEN:#06ae56;--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:#007dbb;--weui-TAG-BACKGROUND-BLUE:rgba(0,125,187,0.1);--weui-TAG-TEXT-BLACK:rgba(0,0,0,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(0,0,0,0.05)}@media(prefers-color-scheme:dark){.wx-root[data-weui-mode='care']:not([data-weui-theme='light']),body[data-weui-mode='care']:not([data-weui-theme='light']){--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.85);--weui-FG-HALF:rgba(255,255,255,0.65);--weui-FG-1:rgba(255,255,255,0.55);--weui-FG-2:rgba(255,255,255,0.35);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}}.wx-root[data-weui-mode='care'][data-weui-theme='dark'],body[data-weui-mode='care'][data-weui-theme='dark']{--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.85);--weui-FG-HALF:rgba(255,255,255,0.65);--weui-FG-1:rgba(255,255,255,0.55);--weui-FG-2:rgba(255,255,255,0.35);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}.wx-root,body{--weui-BG-COLOR-ACTIVE:#ececec}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BG-COLOR-ACTIVE:#373737}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BG-COLOR-ACTIVE:#373737}}body,.wx-root,page{--weui-BTN-HEIGHT:48;--weui-BTN-HEIGHT-MEDIUM:40;--weui-BTN-HEIGHT-SMALL:32}.wx-root,body{--weui-BTN-ACTIVE-MASK:rgba(0,0,0,0.1)}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BTN-ACTIVE-MASK:rgba(255,255,255,0.1)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BTN-ACTIVE-MASK:rgba(255,255,255,0.1)}}.wx-root,body{--weui-BTN-DEFAULT-ACTIVE-BG:#e6e6e6}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BTN-DEFAULT-ACTIVE-BG:rgba(255,255,255,0.126)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BTN-DEFAULT-ACTIVE-BG:rgba(255,255,255,0.126)}}.wx-root,body{--weui-DIALOG-LINE-COLOR:rgba(0,0,0,0.1)}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-DIALOG-LINE-COLOR:rgba(255,255,255,0.1)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-DIALOG-LINE-COLOR:rgba(255,255,255,0.1)}}.weui-hidden_abs{opacity:0;position:absolute;width:1px;height:1px;overflow:hidden}.weui-a11y_ref{display:none}.weui-hidden-space:empty:before{content:"\00A0";position:absolute;width:1px;height:1px;overflow:hidden}.weui-a11y-combo{position:relative}.weui-a11y-combo__helper{opacity:0;position:absolute;width:100%;height:100%;overflow:hidden}.weui-a11y-combo__content{position:relative;z-index:1}.weui-wa-hotarea-el{position:absolute;top:50%;left:50%;-webkit-transform:translate(-50%,-50%);transform:translate(-50%,-50%);min-width:44px;min-height:44px;width:100%;height:100%}.weui-wa-hotarea-el__wrp,.weui-wa-hotarea,.weui-wa-hotarea_before{position:relative}.weui-wa-hotarea-el__wrp a,.weui-wa-hotarea a,.weui-wa-hotarea_before a,.weui-wa-hotarea-el__wrp button,.weui-wa-hotarea button,.weui-wa-hotarea_before button,.weui-wa-hotarea-el__wrp navigator,.weui-wa-hotarea navigator,.weui-wa-hotarea_before navigator{position:relative;z-index:1}.weui-wa-hotarea_before:before,.weui-wa-hotarea:after{content:"";pointer-events:auto;position:absolute;top:50%;left:50%;-webkit-transform:translate(-50%,-50%);transform:translate(-50%,-50%);min-width:44px;min-height:44px;width:100%;height:100%}.test{color:red}body .weui-hidden_abs,body .aria_hidden_abs{position:absolute;opacity:0;overflow:hidden;left:auto;width:1px;height:1px}body .weui-a11y_ref{display:none}body .weui-wa-hotarea_before:before,body .weui-wa-hotarea:after{pointer-events:auto}body .weui-wa-hotarea-el__wrp button{position:relative;z-index:1}</style>
+<script nonce="" type="text/javascript" src="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/lib/wa-helper.min7729a9.js"></script>
+<script nonce="" type="text/javascript">
+  
+</script>
+
+
+
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_colon">：</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_comma">，</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_period">。</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_space">&nbsp;</span>
+
+
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_type_video">视频</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_type_weapp">小程序</span>
+
+
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_zan_btn_txt">赞</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_zan_btn_tips">，轻点两下取消赞</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_like_btn_txt">在看</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_like_btn_tips">，轻点两下取消在看</span>
+
+        <script nonce="">
+	var __DEBUGINFO = {
+		debug_js: "//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/debug/console7729a9.js",
+		safe_js: "//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/safe/moonsafe7729a9.js",
+		res_list: []
+	};
+</script>
+
+<script nonce="" type="text/javascript">
+	(function () {
+		var totalCount = 0,
+			finishCount = 0;
+
+		function _loadVConsolePlugin() {
+			window.vConsole = new window.VConsole();
+			while (window.vConsolePlugins.length > 0) {
+				var p = window.vConsolePlugins.shift();
+				window.vConsole.addPlugin(p);
+			}
+			
+			if (!window.vConsole.isInited) {
+				window.vConsole._render();
+				window.vConsole._mockTap();
+				window.vConsole._bindEvent();
+				window.vConsole._autoRun();
+			}
+		}
+
+		function _addScript(uri, cb) {
+			totalCount++;
+			var node = document.createElement('SCRIPT');
+			node.type = 'text/javascript';
+			node.src = uri;
+			node.setAttribute('nonce', '');
+			if (cb) {
+				node.onload = cb;
+			}
+			document.getElementsByTagName('head')[0].appendChild(node);
+		}
+		if (
+			(document.cookie && document.cookie.indexOf('vconsole_open=1') > -1)
+			|| location.href.indexOf('vconsole=1') > -1
+		) {
+			window.vConsolePlugins = [];
+			_addScript('//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/vconsole/3.2.2/vconsole.min7729a7.js', function () {
+				
+				_addScript('//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/vconsole/plugin/vconsole-mpopt/1.0.1/vconsole-mpopt7729a7.js', _loadVConsolePlugin);
+			});
+		}
+		if (document.cookie && document.cookie.indexOf('__xweb_remote_debug_device_token__') > -1) {
+			_addScript('https://mp.weixin.qq.com/mmbizappmsg/zh_CN/htmledition/js/scripts/mprdev-0.2.5.js', function () {
+				_addScript('https://mp.weixin.qq.com/mmbizappmsg/zh_CN/htmledition/js/scripts/xwebrd-0.0.2.js');
+			});
+		}
+
+		
+		try {
+			var adIframeUrl = localStorage.getItem('__WXLS_ad_iframe_url');
+			if (window === top) {
+				if (adIframeUrl) {
+					if (navigator.userAgent.indexOf('iPhone') > -1) {
+						var img = new Image();
+						img.src = adIframeUrl;
+					} else {
+						var link = document.createElement('link');
+						link.rel = 'prefetch';
+						link.href = adIframeUrl;
+						document.getElementsByTagName('head')[0].appendChild(link);
+					}
+				}
+			}
+		} catch (err) {
+
+		}
+
+	})();
+</script>
+        <script>window.__moon_host = 'res.wx.qq.com';window.moon_map = {"biz_wap/utils/ajax_wx.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/ajax_wx7729a9.js","biz_common/utils/respTypes.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/respTypes7729a9.js","biz_common/utils/url/parse.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/url/parse7729a9.js","biz_common/utils/string/html.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/string/html7729a9.js","common/color/dark.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/dark7729a9.js","common/color/light.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/light7729a9.js","biz_wap/utils/mmversion.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/mmversion7729a9.js","biz_wap/jsapi/mpapp_core.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/jsapi/mpapp_core7729a9.js","complain/tips.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/complain/tips7729a9.js","biz_common/dom/event.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/dom/event7729a9.js","biz_wap/utils/ajax.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/ajax7729a9.js","common/color/background_color.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/background_color7729a9.js","biz_wap/jsapi/core.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/jsapi/core7729a9.js","secitptpage/template/verify.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/secitptpage/template/verify7729a9.js"};</script><script type="text/javascript">window.__wxgspeeds={}; window.__wxgspeeds.moonloadtime=+new Date()</script><script  type="text/javascript" src="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/moon7729a9.js"></script>
+<script type="text/javascript" src="https://captcha.gtimg.com/TCaptcha.js"></script>
+<script type="text/javascript">
+window.cgiData = {
+    register_code : "0" * 1,
+    target_url : "https://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==\x26amp;mid=2651526157\x26amp;idx=1\x26amp;sn=bfc50bb7d76462dfe1dd412fceb73b97\x26amp;chksm=84e1ab9fb39622894678cdbb13c5262262a0469cb1cf4326ac1f13b66af9a2e0d562f182174a\x26amp;nwr_flag=1",
+    cap_appid : "2003810213",
+    cap_sid : "4508590775431679609",
+    poc_sid : "HM2zdGijbOCHWQ9Y4bEnUfRRdmr12wCGRZr2tawo",
+    timeout_ms : "30000" * 1,
+    poc_token : "HM2zdGijv176ir9lsab2gSXz7Gxxv32lrfdB-Bxd",
+}
+console.log('window.cgiData', window.cgiData);
+seajs.use('secitptpage/template/verify.js')
+</script>
+
+    </body>
+</html>
+
diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
index 7a9a4e6d..63d2eb14 100644
--- a/dsLightRag/Test/TestCrawl.py
+++ b/dsLightRag/Test/TestCrawl.py
@@ -7,20 +7,10 @@ import datetime
 import logging
 import random
 import re
+import os
 
 import requests
 
-# 1、安装Firefox软件【最新】
-# https://www.firefox.com.cn/download/#product-desktop-release
-
-# 2、下载geckodriver驱动【最新】
-# https://splinter-docs-zh-cn.readthedocs.io/zh/latest/drivers/firefox.html
-# https://github.com/mozilla/geckodriver/releases
-
-# 3、Python爬虫实战系列：微信公众号文章爬取的5种技术方案总结及代码示例！
-# 方案5：微信公众号后台引用链接方式爬取
-# https://blog.csdn.net/Python_trys/article/details/146506009
-
 """
 # 查看selenium版本
 pip show selenium
@@ -43,17 +33,25 @@ import json
 if __name__ == '__main__':
     # 定义一个空的字典，存放cookies内容
     cookies = {}
-    # 设置headers
+    # 设置headers - 使用微信内置浏览器的User-Agent
     header = {
         "HOST": "mp.weixin.qq.com",
-        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0"
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
+        "Connection": "keep-alive"
     }
     # 用webdriver启动谷歌浏览器
     logging.info("启动浏览器，打开微信公众号登录界面")
     options = Options()
-    options.add_argument('-headless')  # 无头参数
+    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
+    
+    # 设置微信内置浏览器的User-Agent
+    options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
+    
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service)
+    driver = webdriver.Chrome(service=service, options=options)
     # 打开微信公众号登录页面
     driver.get('https://mp.weixin.qq.com/')
     # 等待5秒钟
@@ -143,10 +141,130 @@ if __name__ == '__main__':
                 'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
                 'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
             }
+            print("new_article:", new_article)
             logging.info("new_article:", new_article)
             article_urls.append(item.get('link'))
             time.sleep(1)
 
+    # 确保Logs目录存在
+    logs_dir = "./Test/Logs"
+    if not os.path.exists(logs_dir):
+        os.makedirs(logs_dir)
+
     for article_url in article_urls:
         print("正在爬取文章：" + article_url)
+        try:
+            # 使用requests直接获取文章内容，模拟微信环境
+            wechat_headers = {
+                "User-Agent": "Mozilla/5.0 (Linux; Android 10; MI 8 Build/QKQ1.190828.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/83.0.4103.101 Mobile Safari/537.36 XWEB/1768 MMWEBSDK/20210302 MMWEBID/6253 MicroMessenger/8.0.2.1860(0x28000234) Process/toolsmp WeChat/arm64 Weixin NetType/WIFI Language/zh_CN ABI/arm64",
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+                "Accept-Encoding": "gzip, deflate",
+                "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
+                "X-Requested-With": "com.tencent.mm",
+                "Referer": "https://mp.weixin.qq.com/"
+            }
+            
+            # 使用selenium打开文章链接，设置请求头
+            driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': wechat_headers})
+            driver.get(article_url)
+            # 增加等待时间，确保页面完全加载
+            time.sleep(5)
+            
+            # 检查是否需要登录
+            if "请在微信客户端中打开链接" in driver.page_source or "请在微信中打开此链接" in driver.page_source:
+                print(f"文章需要在微信中打开，尝试使用requests直接获取：{article_url}")
+                # 尝试使用requests直接获取
+                response = requests.get(article_url, headers=wechat_headers, cookies=cookies)
+                if "请在微信客户端中打开链接" in response.text or "请在微信中打开此链接" in response.text:
+                    print(f"使用requests仍然无法获取，跳过此文章：{article_url}")
+                    continue
+                else:
+                    # 保存获取到的HTML内容
+                    filename = f"article_{article_url.split('sn=')[1][:10] if 'sn=' in article_url else 'unknown'}"
+                    save_path = f"{logs_dir}/{filename}.html"
+                    with open(save_path, "w", encoding="utf-8") as f:
+                        f.write(response.text)
+                    print(f"已保存文章HTML内容：{save_path}")
+                    continue
+                
+            # 使用更可靠的选择器查找标题和内容
+            try:
+                # 尝试多种可能的标题选择器
+                title_selectors = [
+                    '//h1[@class="rich_media_title"]', 
+                    '//h1[@id="activity-name"]',
+                    '//h2[@class="rich_media_title"]',
+                    '//div[@class="rich_media_content"]//h1',
+                    '//div[@id="js_article"]//h1'
+                ]
+                
+                title = None
+                for selector in title_selectors:
+                    try:
+                        title_element = driver.find_element('xpath', selector)
+                        title = title_element.text.strip()
+                        if title:
+                            break
+                    except:
+                        continue
+                
+                if not title:
+                    # 如果所有选择器都失败，尝试从页面标题获取
+                    title = driver.title.replace(" - 微信公众号", "").strip()
+                
+                # 尝试多种可能的内容选择器
+                content_selectors = [
+                    '//div[@class="rich_media_content"]',
+                    '//div[@id="js_content"]',
+                    '//div[@class="rich_media_wrp"]'
+                ]
+                
+                content = None
+                for selector in content_selectors:
+                    try:
+                        content_element = driver.find_element('xpath', selector)
+                        content = content_element.text.strip()
+                        if content:
+                            break
+                    except:
+                        continue
+                
+                if not content:
+                    # 如果无法获取内容，至少保存页面源码
+                    content = "无法提取正文内容，保存页面源码：\n" + driver.page_source
+                
+                # 创建文件名（使用标题，但去除不合法的文件名字符）
+                if not title:
+                    title = "未知标题_" + article_url.split("sn=")[1][:10] if "sn=" in article_url else "未知标题"
+                    
+                filename = re.sub(r'[\\/:*?"<>|]', '_', title)
+                
+                # 保存文章内容到文件
+                save_path = f"{logs_dir}/{filename}.txt"
+                with open(save_path, "w", encoding="utf-8") as f:
+                    f.write(f"标题：{title}\n\n")
+                    f.write(f"链接：{article_url}\n\n")
+                    f.write(f"内容：\n{content}")
+                    
+                print(f"文章《{title}》保存成功：{save_path}")
+                
+            except Exception as e:
+                print(f"提取文章内容失败：{str(e)}")
+                # 保存页面源码以便分析
+                error_filename = "error_" + article_url.split("sn=")[1][:10] if "sn=" in article_url else "error_page"
+                error_path = f"{logs_dir}/{error_filename}.html"
+                with open(error_path, "w", encoding="utf-8") as f:
+                    f.write(driver.page_source)
+                print(f"已保存页面源码到：{error_path}")
+            
+            # 避免频繁请求被封
+            time.sleep(random.uniform(3, 7))
+            
+        except Exception as e:
+            print(f"爬取文章失败：{article_url}，错误信息：{str(e)}")
+            continue
+    
+    # 关闭浏览器
+    driver.quit()
+    print("所有文章爬取完成！")
 

From 674d27e936c2342d398841ba9f96ab9b1579443d Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Mon, 14 Jul 2025 15:49:52 +0800
Subject: [PATCH 07/46] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../api/controller/DmController.py            |   6 +-
 .../api/controller/DocumentController.py      | 127 +++++++++-
 .../api/controller/LoginController.py         |   7 +-
 .../api/controller/ThemeController.py         |  73 +++++-
 dsAiTeachingModel/config/Config.py            |  36 +--
 dsAiTeachingModel/utils/Database.py           | 230 ++++++++++++------
 dsAiTeachingModel/utils/JwtUtil.py            |   2 +-
 dsAiTeachingModel/utils/LightRagUtil.py       |   2 +-
 dsAiTeachingModel/utils/PageUtil.py           |  48 ++++
 9 files changed, 423 insertions(+), 108 deletions(-)
 create mode 100644 dsAiTeachingModel/utils/PageUtil.py

diff --git a/dsAiTeachingModel/api/controller/DmController.py b/dsAiTeachingModel/api/controller/DmController.py
index 41583878..7dbd0e51 100644
--- a/dsAiTeachingModel/api/controller/DmController.py
+++ b/dsAiTeachingModel/api/controller/DmController.py
@@ -13,11 +13,11 @@ router = APIRouter(dependencies=[Depends(get_current_user)])
 async def get_stage_subject_list():
     # 先查询学段list
     select_stage_sql: str = "select stage_id, stage_name from t_dm_stage where b_use = 1 order by sort_id;"
-    stage_list = await find_by_sql(select_stage_sql, ())
+    stage_list = await find_by_sql(select_stage_sql,())
     for stage in stage_list:
         # 再查询学科list
-        select_subject_sql: str = "select subject_id, subject_name from t_dm_subject where stage_id = %s order by sort_id;"
-        subject_list = await find_by_sql(select_subject_sql, (stage["stage_id"],))
+        select_subject_sql: str = "select subject_id, subject_name from t_dm_subject where stage_id = " + str(stage["stage_id"]) + " order by sort_id;"
+        subject_list = await find_by_sql(select_subject_sql,())
         stage["subject_list"] = subject_list
 
     return {"success": True, "message": "成功！", "data": stage_list}
diff --git a/dsAiTeachingModel/api/controller/DocumentController.py b/dsAiTeachingModel/api/controller/DocumentController.py
index 34576442..d88da710 100644
--- a/dsAiTeachingModel/api/controller/DocumentController.py
+++ b/dsAiTeachingModel/api/controller/DocumentController.py
@@ -1,13 +1,132 @@
 # routes/LoginController.py
+import os
 
-from fastapi import APIRouter, Request, Response, Depends
+from fastapi import APIRouter, Request, Response, Depends, UploadFile, File
 
 from auth.dependencies import get_current_user
+from utils.PageUtil import *
+from utils.ParseRequest import *
 
 # 创建一个路由实例,需要依赖get_current_user,登录后才能访问
 router = APIRouter(dependencies=[Depends(get_current_user)])
 
+# 创建上传文件的目录
+UPLOAD_DIR = "upload_file"
+if not os.path.exists(UPLOAD_DIR):
+    os.makedirs(UPLOAD_DIR)
 
-@router.get("/")
-async def test(request: Request, response: Response):
-    return {"success": True, "message": "成功！"}
+# 合法文件扩展名
+supported_suffix_types = ['doc', 'docx', 'ppt', 'pptx', 'xls', 'xlsx']
+
+# 【Document-1】文档管理列表
+@router.get("/list")
+async def list(request: Request):
+    # 获取参数
+    person_id = await get_request_str_param(request, "person_id", True, True)
+    stage_id = await get_request_num_param(request, "stage_id", False, True, -1)
+    subject_id = await get_request_num_param(request, "subject_id", False, True, -1)
+    document_suffix = await get_request_str_param(request, "document_suffix", False, True)
+    document_name = await get_request_str_param(request, "document_name", False, True)
+    page_number = await get_request_num_param(request, "page_number", False, True, 1)
+    page_size = await get_request_num_param(request, "page_size", False, True, 10)
+
+    print(person_id, stage_id, subject_id, document_suffix, document_name, page_number, page_size)
+
+    # 拼接查询SQL语句
+
+    select_document_sql: str = " SELECT * FROM t_ai_teaching_model_document WHERE is_deleted = 0 and person_id = '" + person_id + "'"
+    if stage_id != -1:
+        select_document_sql += " AND stage_id = " + str(stage_id)
+    if subject_id != -1:
+        select_document_sql += " AND subject_id = " + str(subject_id)
+    if document_suffix != "":
+        select_document_sql += " AND document_suffix = '" + document_suffix + "'"
+    if document_name != "":
+        select_document_sql += " AND document_name = '" + document_name + "'"
+    select_document_sql += " ORDER BY create_time DESC "
+
+    # 查询文档列表
+    page = await get_page_data_by_sql(select_document_sql, page_number, page_size)
+    for item in page["list"]:
+        theme_info = await find_by_id("t_ai_teaching_model_theme", "id", item["theme_id"])
+        item["theme_info"] = theme_info
+
+    return {"success": True, "message": "查询成功！", "data": page}
+
+
+# 【Document-2】保存文档管理
+@router.post("/save")
+async def save(request: Request, file: UploadFile = File(...)):
+    # 获取参数
+    id = await get_request_num_param(request, "id", False, True, 0)
+    stage_id = await get_request_num_param(request, "stage_id", False, True, -1)
+    subject_id = await get_request_num_param(request, "subject_id", False, True, -1)
+    theme_id = await get_request_num_param(request, "theme_id", True, True, None)
+    person_id = await get_request_str_param(request, "person_id", True, True)
+    bureau_id = await get_request_str_param(request, "bureau_id", True, True)
+    # 先获取theme主题信息
+    theme_object = await find_by_id("t_ai_teaching_model_theme", "id", theme_id)
+    if theme_object is None:
+        return {"success": False, "message": "主题不存在！"}
+    # 获取文件名
+    document_name = file.filename
+    # 检查文件名在该主题下是否重复
+    select_theme_document_sql: str = "SELECT * FROM t_ai_teaching_model_document WHERE is_deleted = 0 and document_name = '" + document_name + "'"
+    if id != 0:
+        select_theme_document_sql += " AND id <> " + id
+    theme_document = await find_by_sql(select_theme_document_sql, ())
+    if theme_document is not None:
+        return {"success": False, "message": "该主题下文档名称重复！"}
+    # 获取文件扩展名
+    document_suffix = file.filename.split(".")[-1]
+    # 检查文件扩展名
+    if document_suffix not in supported_suffix_types:
+        return {"success": False, "message": "不支持的文件类型！"}
+    # 构造文件保存路径
+    document_dir = UPLOAD_DIR + os.sep + str(theme_object["short_name"]) + "_" + str(theme_object["id"]) + os.sep
+    if not os.path.exists(document_dir):
+        os.makedirs(document_dir)
+    document_path = os.path.join(document_dir, file.filename)
+    # 保存文件
+    try:
+        with open(document_path, "wb") as buffer:
+            buffer.write(await file.read())
+    except Exception as e:
+        return {"success": False, "message": f"文件保存失败！{e}"}
+
+    # 构造保存文档SQL语句
+    param = {"stage_id": stage_id, "subject_id": subject_id, "document_name": document_name, "theme_id": theme_id, "document_path": document_path, "document_suffix": document_suffix, "person_id": person_id, "bureau_id": bureau_id}
+
+    # 保存数据
+    if id == 0:
+        param["train_flag"] = 0
+        # 插入数据
+        id = await insert("t_ai_teaching_model_document", param, False)
+        return {"success": True, "message": "保存成功！", "data": {"insert_id" : id}}
+    else:
+        # 更新数据
+        await update("t_ai_teaching_model_document", param, "id", id)
+        return {"success": True, "message": "更新成功！", "data": {"update_id" : id}}
+
+# 【Document-3】获取文档信息
+@router.get("/get")
+async def get(request: Request):
+    # 获取参数
+    id = await get_request_num_param(request, "id", True, True, None)
+    # 查询数据
+    document_object = await find_by_id("t_ai_teaching_model_document", "id", id)
+    if document_object is None:
+        return {"success": False, "message": "未查询到该文档信息！"}
+    theme_info = await find_by_id("t_ai_teaching_model_theme", "id", document_object["theme_id"])
+    document_object["theme_info"] = theme_info
+    return {"success": True, "message": "查询成功！", "data": {"document": document_object}}
+
+
+@router.post("/delete")
+async def delete(request: Request):
+    # 获取参数
+    id = await get_request_num_param(request, "id", True, True, None)
+    result = await delete_by_id("t_ai_teaching_model_document", "id", id)
+    if not result:
+        return {"success": False, "message": "删除失败！"}
+    return {"success": True, "message": "删除成功！"}
\ No newline at end of file
diff --git a/dsAiTeachingModel/api/controller/LoginController.py b/dsAiTeachingModel/api/controller/LoginController.py
index 4a004d45..307fd3b6 100644
--- a/dsAiTeachingModel/api/controller/LoginController.py
+++ b/dsAiTeachingModel/api/controller/LoginController.py
@@ -13,7 +13,7 @@ from utils.CookieUtil import *
 from utils.Database import *
 from utils.JwtUtil import *
 from utils.ParseRequest import *
-from config.Config import *
+from Config.Config import *
 
 # 创建一个路由实例
 router = APIRouter()
@@ -108,8 +108,9 @@ async def login(request: Request, response: Response):
         return {"success": False, "message": "用户名和密码不能为空"}
 
     password = md5_encrypt(password)
-    select_user_sql: str = "SELECT person_id, person_name, identity_id, login_name, xb, bureau_id, org_id, pwdmd5 FROM t_sys_loginperson WHERE login_name = %s AND b_use = 1"
-    user = await find_one_by_sql(select_user_sql, (username,))
+    select_user_sql: str = "SELECT person_id, person_name, identity_id, login_name, xb, bureau_id, org_id, pwdmd5 FROM t_sys_loginperson WHERE login_name = '" + username + "' AND b_use = 1"
+    userlist = await find_by_sql(select_user_sql,())
+    user = userlist[0] if userlist else None
     logging.info(f"查询结果: {user}")
     if user and user['pwdmd5'] == password:  # 验证的cas用户密码，md5加密的版本
         token = create_access_token({"user_id": user['person_id'], "identity_id": user['identity_id']})
diff --git a/dsAiTeachingModel/api/controller/ThemeController.py b/dsAiTeachingModel/api/controller/ThemeController.py
index 26903275..297817d1 100644
--- a/dsAiTeachingModel/api/controller/ThemeController.py
+++ b/dsAiTeachingModel/api/controller/ThemeController.py
@@ -3,12 +3,15 @@
 from fastapi import APIRouter, Depends
 from utils.ParseRequest import *
 from auth.dependencies import *
-from utils.Database import *
+from utils.PageUtil import *
 
 # 创建一个路由实例,需要依赖get_current_user,登录后才能访问
 router = APIRouter(dependencies=[Depends(get_current_user)])
 
-
+# 功能：【Theme-1】主题管理列表
+# 作者：Kalman.CHENG ☆
+# 时间：2025-07-14
+# 备注：
 @router.get("/list")
 async def list(request: Request):
     # 获取参数
@@ -24,9 +27,9 @@ async def list(request: Request):
     # 拼接查询SQL语句
     select_theme_sql: str = " SELECT * FROM t_ai_teaching_model_theme WHERE is_deleted = 0 and person_id = '" + person_id + "'"
     if stage_id != -1:
-        select_theme_sql += " and stage_id = " + stage_id
+        select_theme_sql += " and stage_id = " + str(stage_id)
     if subject_id != -1:
-        select_theme_sql += " and subject_id = " + subject_id
+        select_theme_sql += " and subject_id = " + str(subject_id)
     if theme_name != "":
         select_theme_sql += " and theme_name = '" + theme_name + "'"
     select_theme_sql += " ORDER BY create_time DESC"
@@ -37,16 +40,76 @@ async def list(request: Request):
     return {"success": True, "message": "查询成功！", "data": page}
 
 
+# 功能：【Theme-2】保存主题管理
+# 作者：Kalman.CHENG ☆
+# 时间：2025-07-14
+# 备注：
 @router.post("/save")
 async def save(request: Request):
     # 获取参数
     id = await get_request_num_param(request, "id", False, True, 0)
     theme_name = await get_request_str_param(request, "theme_name", True, True)
+    short_name = await get_request_str_param(request, "short_name", True, True)
     theme_icon = await get_request_str_param(request, "theme_icon", False, True)
     stage_id = await get_request_num_param(request, "stage_id", True, True, None)
     subject_id = await get_request_num_param(request, "subject_id", True, True, None)
     person_id = await get_request_str_param(request, "person_id", True, True)
     bureau_id = await get_request_str_param(request, "bureau_id", True, True)
-    # 业务逻辑处理
+
+    # 校验参数
+    check_theme_sql = "SELECT theme_name FROM t_ai_teaching_model_theme WHERE is_deleted = 0 and bureau_id = '" + bureau_id + "' and theme_name = '" + theme_name + "'"
+    if id != 0:
+        check_theme_sql += " and id <> " + id
+    print(check_theme_sql)
+    check_theme_result = await find_by_sql(check_theme_sql,())
+    if check_theme_result:
+        return {"success": False, "message": "该主题名称已存在！"}
+
+    check_short_name_sql = "SELECT short_name FROM t_ai_teaching_model_theme WHERE is_deleted = 0 and bureau_id = '" + bureau_id + "' and short_name = '" + short_name + "'"
+    if id != 0:
+        check_short_name_sql += " and id <> " + id
+    print(check_short_name_sql)
+    check_short_name_result = await find_by_sql(check_short_name_sql,())
+    if check_short_name_result:
+        return {"success": False, "message": "该主题英文简称已存在！"}
+
+    # 组装参数
+    param = {"theme_name": theme_name,"short_name": short_name,"theme_icon": theme_icon,"stage_id": stage_id,"subject_id": subject_id,"person_id": person_id,"bureau_id": bureau_id}
+
+    # 保存数据
+    if id == 0:
+        param["search_flag"] = 0
+        param["train_flag"] = 0
+        # 插入数据
+        id = await insert("t_ai_teaching_model_theme", param, False)
+        return {"success": True, "message": "保存成功！", "data": {"insert_id" : id}}
+    else:
+        # 更新数据
+        await update("t_ai_teaching_model_theme", param, "id", id, False)
+        return {"success": True, "message": "更新成功！", "data": {"update_id" : id}}
+
+
+# 功能：【Theme-3】获取主题信息
+# 作者：Kalman.CHENG ☆
+# 时间：2025-07-14
+# 备注：
+@router.get("/get")
+async def get(request: Request):
+    # 获取参数
+    id = await get_request_num_param(request, "id", True, True, None)
+    theme_obj = await find_by_id("t_ai_teaching_model_theme", "id", id)
+    if theme_obj is None:
+        return {"success": False, "message": "未查询到该主题信息！"}
+    return {"success": True, "message": "查询成功！", "data": {"theme": theme_obj}}
+
+
+@router.post("/delete")
+async def delete(request: Request):
+    # 获取参数
+    id = await get_request_num_param(request, "id", True, True, None)
+    result = await delete_by_id("t_ai_teaching_model_theme", "id", id)
+    if not result:
+        return {"success": False, "message": "删除失败！"}
+    return {"success": True, "message": "删除成功！"}
 
 
diff --git a/dsAiTeachingModel/config/Config.py b/dsAiTeachingModel/config/Config.py
index 3d4a460b..1b4ca3f3 100644
--- a/dsAiTeachingModel/config/Config.py
+++ b/dsAiTeachingModel/config/Config.py
@@ -1,13 +1,18 @@
-# 大模型 【DeepSeek深度求索官方】
-#LLM_API_KEY = "sk-44ae895eeb614aa1a9c6460579e322f1"
-#LLM_BASE_URL = "https://api.deepseek.com"
-#LLM_MODEL_NAME = "deepseek-chat"
+# 阿里云的配置信息
+ALY_AK = 'LTAI5tE4tgpGcKWhbZg6C4bh'
+ALY_SK = 'oizcTOZ8izbGUouboC00RcmGE8vBQ1'
 
-# 阿里云提供的大模型服务
-LLM_API_KEY="sk-f6da0c787eff4b0389e4ad03a35a911f"
+# 大模型 【DeepSeek深度求索官方】训练时用这个
+# LLM_API_KEY = "sk-44ae895eeb614aa1a9c6460579e322f1"
+# LLM_BASE_URL = "https://api.deepseek.com"
+# LLM_MODEL_NAME = "deepseek-chat"
+
+# 阿里云提供的大模型服务 【阿里云在处理文字材料时，容易引发绿网拦截，导致数据上报异常】
+LLM_API_KEY = "sk-f6da0c787eff4b0389e4ad03a35a911f"
 LLM_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
 #LLM_MODEL_NAME = "qwen-plus" # 不要使用通义千问，会导致化学方程式不正确！
 LLM_MODEL_NAME = "deepseek-v3"
+#LLM_MODEL_NAME = "deepseek-r1"  # 使用更牛B的r1模型
 
 EMBED_MODEL_NAME = "BAAI/bge-m3"
 EMBED_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl"
@@ -15,21 +20,20 @@ EMBED_BASE_URL = "https://api.siliconflow.cn/v1"
 EMBED_DIM = 1024
 EMBED_MAX_TOKEN_SIZE = 8192
 
-
 NEO4J_URI = "bolt://localhost:7687"
 NEO4J_USERNAME = "neo4j"
 NEO4J_PASSWORD = "DsideaL147258369"
+NEO4J_AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)
 
-
-# MYSQL配置信息
-MYSQL_HOST = "127.0.0.1"
-MYSQL_PORT = 22066
-MYSQL_USER = "root"
-MYSQL_PASSWORD = "DsideaL147258369"
-MYSQL_DB_NAME = "base_db"
-MYSQL_POOL_SIZE = 200
+# POSTGRESQL配置信息
+AGE_GRAPH_NAME = "dickens"
+POSTGRES_HOST = "10.10.14.208"
+POSTGRES_PORT = 5432
+POSTGRES_USER = "postgres"
+POSTGRES_PASSWORD = "postgres"
+POSTGRES_DATABASE = "rag"
 
 # JWT配置信息
 JWT_SECRET_KEY = "ZXZnZWVr5b+r5LmQ5L2g55qE5Ye66KGM"
 ALGORITHM = "HS256"
-ACCESS_TOKEN_EXPIRE_MINUTES = 300000  # 访问令牌过期时间（分钟）
+ACCESS_TOKEN_EXPIRE_MINUTES = 300000  # 访问令牌过期时间（分钟）
\ No newline at end of file
diff --git a/dsAiTeachingModel/utils/Database.py b/dsAiTeachingModel/utils/Database.py
index db606938..85580029 100644
--- a/dsAiTeachingModel/utils/Database.py
+++ b/dsAiTeachingModel/utils/Database.py
@@ -1,25 +1,23 @@
 # Database.py
+import datetime
 import logging
-import math
+import asyncpg
 
-import aiomysql
-import asyncio
-from config.Config import *
+from Config.Config import *
 
 # 创建一个全局的连接池
 pool = None
 
-async def create_pool(loop):
+async def create_pool():
     global pool
-    pool = await aiomysql.create_pool(
-        host=MYSQL_HOST,
-        port=MYSQL_PORT,
-        user=MYSQL_USER,
-        password=MYSQL_PASSWORD,
-        db=MYSQL_DB_NAME,
-        minsize=1,  # 设置连接池最小连接数
-        maxsize=MYSQL_POOL_SIZE,  # 设置连接池最大连接数
-        cursorclass=aiomysql.DictCursor  # 指定游标为字典模式
+    pool = await asyncpg.create_pool(
+        host=POSTGRES_HOST,
+        port=POSTGRES_PORT,
+        user=POSTGRES_USER,
+        password=POSTGRES_PASSWORD,
+        database=POSTGRES_DATABASE,
+        min_size=1,  # 设置连接池最小连接数
+        max_size=100  # 设置连接池最大连接数
     )
 
 async def get_connection():
@@ -30,18 +28,17 @@ async def get_connection():
 
 async def close_pool():
     if pool is not None:
-        pool.close()
-        await pool.wait_closed()
+        await pool.close()
 
 # 初始化连接池的函数
 async def init_database():
-    loop = asyncio.get_event_loop()
-    await create_pool(loop)
+    await create_pool()
 
 # 关闭连接池的函数
 async def shutdown_database():
     await close_pool()
 
+
 # 根据sql语句查询数据
 async def find_by_sql(sql: str, params: tuple):
     if pool is None:
@@ -49,79 +46,162 @@ async def find_by_sql(sql: str, params: tuple):
         return None
     try:
         async with pool.acquire() as conn:
-            async with conn.cursor() as cur:
-                await cur.execute(sql, params)
-                result = await cur.fetchall()
-        if result:
-            return result
-        else:
-            return None
+            result = await conn.fetch(sql, *params)
+            # 将 asyncpg.Record 转换为字典
+            result_dict = [dict(record) for record in result]
+            if result_dict:
+                return result_dict
+            else:
+                return None
     except Exception as e:
         logging.error(f"数据库查询错误: {e}")
         return None
 
+# 插入数据
+async def insert(tableName, param, onlyForParam=False):
+    current_time = datetime.datetime.now()
+    columns = []
+    values = []
+    placeholders = []
+
+    for key, value in param.items():
+        if value is not None:
+            if isinstance(value, (int, float)):
+                columns.append(key)
+                values.append(value)
+                placeholders.append(f"${len(values)}")
+            elif isinstance(value, str):
+                columns.append(key)
+                values.append(value)
+                placeholders.append(f"${len(values)}")
+        else:
+            columns.append(key)
+            values.append(None)
+            placeholders.append("NULL")
+
+    if not onlyForParam:
+        if 'is_deleted' not in param:
+            columns.append("is_deleted")
+            values.append(0)
+            placeholders.append(f"${len(values)}")
+
+        if 'create_time' not in param:
+            columns.append("create_time")
+            values.append(current_time)
+            placeholders.append(f"${len(values)}")
+
+        if 'update_time' not in param:
+            columns.append("update_time")
+            values.append(current_time)
+            placeholders.append(f"${len(values)}")
+
+    # 构造 SQL 语句
+    column_names = ", ".join(columns)
+    placeholder_names = ", ".join(placeholders)
+    sql = f"INSERT INTO {tableName} ({column_names}) VALUES ({placeholder_names}) RETURNING id"
 
-# 根据sql语句查询数据
-async def find_one_by_sql(sql: str, params: tuple):
-    if pool is None:
-        logging.error("数据库连接池未创建")
-        return None
     try:
         async with pool.acquire() as conn:
-            async with conn.cursor() as cur:
-                await cur.execute(sql, params)
-                result = await cur.fetchone()
+            result = await conn.fetchrow(sql, *values)
         if result:
-            return result
+            return result['id']
         else:
+            logging.error("插入数据失败: 未返回ID")
             return None
     except Exception as e:
         logging.error(f"数据库查询错误: {e}")
-        return None
+        logging.error(f"执行的SQL语句: {sql}")
+        logging.error(f"参数: {values}")
+        raise Exception(f"为表[{tableName}]插入数据失败: {e}")
+
+
+# 更新数据
+async def update(table_name, param, property_name, property_value, only_for_param=False):
+    current_time = datetime.datetime.now()
+    set_clauses = []
+    values = []
+
+    # 处理要更新的参数
+    for key, value in param.items():
+        if value is not None:
+            if isinstance(value, (int, float)):
+                set_clauses.append(f"{key} = ${len(values) + 1}")
+                values.append(value)
+            elif isinstance(value, str):
+                set_clauses.append(f"{key} = ${len(values) + 1}")
+                values.append(value)
+        else:
+            set_clauses.append(f"{key} = NULL")
+            values.append(None)
+
+    if not only_for_param:
+        if 'update_time' not in param:
+            set_clauses.append(f"update_time = ${len(values) + 1}")
+            values.append(current_time)
 
-# 查询数据条数
-async def get_total_data_count(total_data_sql):
-    total_data_count = 0
-    total_data_count_sql = "select count(1) as count from (" + total_data_sql + ") as temp_table"
-    result = await find_one_by_sql(total_data_count_sql, ())
-    if result:
-        total_data_count = result.get("count")
-    return total_data_count
+    # 构造 SQL 语句
+    set_clause = ", ".join(set_clauses)
+    sql = f"UPDATE {table_name} SET {set_clause} WHERE {property_name} = ${len(values) + 1} RETURNING id"
+    print(sql)
 
+    # 添加条件参数
+    values.append(property_value)
 
-def get_page_by_total_row(total_data_count, page_number, page_size):
-    total_page = (page_size != 0) and math.floor((total_data_count + page_size - 1) / page_size) or 0
-    if page_number <= 0:
-        page_number = 1
-    if 0 < total_page < page_number:
-        page_number = total_page
-    offset = page_size * page_number - page_size
-    limit = page_size
-    return total_data_count, total_page, offset, limit
+    try:
+        async with pool.acquire() as conn:
+            result = await conn.fetchrow(sql, *values)
+            if result:
+                return result['id']
+            else:
+                logging.error("更新数据失败: 未返回ID")
+                return None
+    except Exception as e:
+        logging.error(f"数据库查询错误: {e}")
+        logging.error(f"执行的SQL语句: {sql}")
+        logging.error(f"参数: {values}")
+        raise Exception(f"为表[{table_name}]更新数据失败: {e}")
 
 
-async def get_page_data_by_sql(total_data_sql: str, page_number: int, page_size: int):
-    if pool is None:
-        logging.error("数据库连接池未创建")
-        return None
-    total_row: int = 0
-    total_page: int = 0
-    total_data_sql = total_data_sql.replace(";", "")
-    total_data_sql = total_data_sql.replace(" FROM ", " from ")
-
-    # 查询总数
-    total_data_count = await get_total_data_count(total_data_sql)
-    if total_data_count == 0:
-        return {"page_number": page_number, "page_size": page_size, "total_row": 0, "total_page": 0, "list": []}
+
+# 获取Bean
+# 通过主键查询
+async def find_by_id(table_name, property_name, property_value):
+    if table_name and property_name and property_value is not None:
+        # 构造 SQL 语句
+        sql = f"SELECT * FROM {table_name} WHERE is_deleted = 0 AND {property_name} = $1"
+        logging.debug(sql)
+
+        # 执行查询
+        result = await find_by_sql(sql, (property_value,))
+        if not result:
+            logging.error("查询失败: 未找到数据")
+            return None
+        # 返回第一条数据
+        return result[0]
     else:
-        total_row, total_page, offset, limit = get_page_by_total_row(total_data_count, page_number, page_size)
-
-    # 构造执行分页查询的sql语句
-    page_data_sql = total_data_sql + " LIMIT %d, %d " % (offset, limit)
-    print(page_data_sql)
-    # 执行分页查询
-    page_data = await find_by_sql(page_data_sql, ())
-    if page_data:
-        return {"page_number": page_number, "page_size": page_size, "total_row": total_row, "total_page": total_page, "list": page_data}
+        logging.error("参数不全")
+        return None
+
+# 通过主键删除
+# 逻辑删除
+async def delete_by_id(table_name, property_name, property_value):
+    if table_name and property_name and property_value is not None:
+        sql = f"UPDATE {table_name} SET is_deleted = 1, update_time = now() WHERE {property_name} = $1 and is_deleted = 0"
+        logging.debug(sql)
+        # 执行删除
+        try:
+            async with pool.acquire() as conn:
+                result = await conn.execute(sql, property_value)
+                if result:
+                    return True
+                else:
+                    logging.error("删除失败: 未找到数据")
+                    return False
+        except Exception as e:
+            logging.error(f"数据库查询错误: {e}")
+            logging.error(f"执行的SQL语句: {sql}")
+            logging.error(f"参数: {property_value}")
+            raise Exception(f"为表[{table_name}]删除数据失败: {e}")
     else:
-        return {"page_number": page_number, "page_size": page_size, "total_row": 0, "total_page": 0, "list": []}
+        logging.error("参数不全")
+        return False
\ No newline at end of file
diff --git a/dsAiTeachingModel/utils/JwtUtil.py b/dsAiTeachingModel/utils/JwtUtil.py
index 4118a695..90b30808 100644
--- a/dsAiTeachingModel/utils/JwtUtil.py
+++ b/dsAiTeachingModel/utils/JwtUtil.py
@@ -2,7 +2,7 @@
 
 from datetime import datetime, timedelta
 from jose import JWTError, jwt
-from config.Config import *
+from Config.Config import *
 
 
 def create_access_token(data: dict):
diff --git a/dsAiTeachingModel/utils/LightRagUtil.py b/dsAiTeachingModel/utils/LightRagUtil.py
index 4b038c1d..528f5963 100644
--- a/dsAiTeachingModel/utils/LightRagUtil.py
+++ b/dsAiTeachingModel/utils/LightRagUtil.py
@@ -8,7 +8,7 @@ from lightrag import LightRAG
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug
-from config.Config import *
+from Config.Config import *
 
 
 async def print_stream(stream):
diff --git a/dsAiTeachingModel/utils/PageUtil.py b/dsAiTeachingModel/utils/PageUtil.py
new file mode 100644
index 00000000..ae84601e
--- /dev/null
+++ b/dsAiTeachingModel/utils/PageUtil.py
@@ -0,0 +1,48 @@
+import math
+from utils.Database import *
+
+
+# 查询数据条数
+async def get_total_data_count(total_data_sql):
+    total_data_count = 0
+    total_data_count_sql = "select count(*) as num from (" + total_data_sql + ") as temp_table"
+    result = await find_by_sql(total_data_count_sql,())
+    row = result[0] if result else None
+    if row:
+        total_data_count = row.get("num")
+    return total_data_count
+
+
+def get_page_by_total_row(total_data_count, page_number, page_size):
+    total_page = (page_size != 0) and math.floor((total_data_count + page_size - 1) / page_size) or 0
+    if page_number <= 0:
+        page_number = 1
+    if 0 < total_page < page_number:
+        page_number = total_page
+    offset = page_size * page_number - page_size
+    limit = page_size
+    return total_data_count, total_page, offset, limit
+
+
+async def get_page_data_by_sql(total_data_sql: str, page_number: int, page_size: int):
+    total_row: int = 0
+    total_page: int = 0
+    total_data_sql = total_data_sql.replace(";", "")
+    total_data_sql = total_data_sql.replace(" FROM ", " from ")
+
+    # 查询总数
+    total_data_count = await get_total_data_count(total_data_sql)
+    if total_data_count == 0:
+        return {"page_number": page_number, "page_size": page_size, "total_row": 0, "total_page": 0, "list": []}
+    else:
+        total_row, total_page, offset, limit = get_page_by_total_row(total_data_count, page_number, page_size)
+
+    # 构造执行分页查询的sql语句
+    page_data_sql = total_data_sql + " LIMIT %d offset %d " % (limit, offset)
+    print(page_data_sql)
+    # 执行分页查询
+    page_data = await find_by_sql(page_data_sql, ())
+    if page_data:
+        return {"page_number": page_number, "page_size": page_size, "total_row": total_row, "total_page": total_page, "list": page_data}
+    else:
+        return {"page_number": page_number, "page_size": page_size, "total_row": 0, "total_page": 0, "list": []}

From 0f161ab1c3f11536f555679f93b96a8ba565bc22 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 08:58:55 +0800
Subject: [PATCH 08/46] 'commit'

---
 dsLightRag/Test/T1_Login.py   | 162 ++++++++++++++++++++
 dsLightRag/Test/T2_GetList.py | 162 ++++++++++++++++++++
 dsLightRag/Test/TestCrawl.py  | 270 ----------------------------------
 3 files changed, 324 insertions(+), 270 deletions(-)
 create mode 100644 dsLightRag/Test/T1_Login.py
 create mode 100644 dsLightRag/Test/T2_GetList.py
 delete mode 100644 dsLightRag/Test/TestCrawl.py

diff --git a/dsLightRag/Test/T1_Login.py b/dsLightRag/Test/T1_Login.py
new file mode 100644
index 00000000..6db1e6f1
--- /dev/null
+++ b/dsLightRag/Test/T1_Login.py
@@ -0,0 +1,162 @@
+# 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
+# https://blog.csdn.net/k352733625/article/details/149222945
+
+# 微信爬爬猫---公众号文章抓取代码分析
+# https://blog.csdn.net/yajuanpi4899/article/details/121584268
+
+"""
+安装pdfkit库
+复制
+pip3 install pdfkit -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
+1.
+import pdfkit
+pdfkit.from_url('公众号文章地址', 'out.pdf')
+"""
+import datetime
+import logging
+import random
+import re
+
+import requests
+
+"""
+# 查看selenium版本
+pip show selenium
+4.34.2
+
+# 查看Chrome浏览器版本
+chrome://version/
+138.0.7204.101 (正式版本) （64 位）
+
+# 下载驱动包
+https://googlechromelabs.github.io/chrome-for-testing/
+https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
+"""
+import time
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service as ChromeService
+
+if __name__ == '__main__':
+    # 定义一个空的字典，存放cookies内容
+    cookies = {}
+    # 设置headers - 使用微信内置浏览器的User-Agent
+    header = {
+        "HOST": "mp.weixin.qq.com",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
+        "Connection": "keep-alive"
+    }
+    # 用webdriver启动谷歌浏览器
+    logging.info("启动浏览器，打开微信公众号登录界面")
+    options = Options()
+    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
+
+    # 设置微信内置浏览器的User-Agent
+    options.add_argument(
+        '--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
+
+    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+    driver = webdriver.Chrome(service=service, options=options)
+    # 打开微信公众号登录页面
+    driver.get('https://mp.weixin.qq.com/')
+    # 等待5秒钟
+    time.sleep(2)
+    # # 拿手机扫二维码！
+    logging.info("请拿手机扫码二维码登录公众号")
+    time.sleep(20)
+
+    # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
+    driver.get('https://mp.weixin.qq.com/')
+    # 获取cookies
+    cookie_items = driver.get_cookies()
+    # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
+    for cookie_item in cookie_items:
+        cookies[cookie_item['name']] = cookie_item['value']
+
+    if "slave_sid" not in cookies:
+        logging.info("登录公众号失败，获取cookie失败")
+        exit()
+    # cookies = json.dumps(post)  # 注释掉这一行
+
+    # 方法3：使用requests库发送请求获取重定向URL
+    url = 'https://mp.weixin.qq.com'
+    response = requests.get(url=url, allow_redirects=False, cookies=cookies)
+    if 'Location' in response.headers:
+        redirect_url = response.headers.get("Location")
+        print("重定向URL:", redirect_url)
+        token_match = re.findall(r'token=(\d+)', redirect_url)
+        if token_match:
+            token = token_match[0]
+            print("获取到的token:", token)
+            logging.info("微信token:" + token)
+
+    article_urls = []
+    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"}]
+    for item in gzlist:
+        account_name = item["account_name"]
+        account_id = item["account_id"]
+        # 搜索微信公众号的接口地址
+        search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
+        # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
+        query_id = {
+            'action': 'search_biz',
+            'token': token,
+            'lang': 'zh_CN',
+            'f': 'json',
+            'ajax': '1',
+            'random': random.random(),
+            'query': account_name,
+            'begin': '0',
+            'count': '5'
+        }
+        # 打开搜索微信公众号接口地址，需要传入相关参数信息如：cookies、params、headers
+        search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
+        # 取搜索结果中的第一个公众号
+        lists = search_response.json().get('list')[0]
+        # 获取这个公众号的fakeid，后面爬取公众号文章需要此字段
+        fakeid = lists.get('fakeid')
+        logging.info("fakeid:" + fakeid)
+        # 微信公众号文章接口地址
+        appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
+        # 搜索文章需要传入几个参数：登录的公众号token、要爬取文章的公众号fakeid、随机数random
+        query_id_data = {
+            'token': token,
+            'lang': 'zh_CN',
+            'f': 'json',
+            'ajax': '1',
+            'random': random.random(),
+            'action': 'list_ex',
+            'begin': '0',  # 不同页，此参数变化，变化规则为每页加5
+            'count': '5',
+            'query': '',
+            'fakeid': fakeid,
+            'type': '9'
+        }
+        # 打开搜索的微信公众号文章列表页
+        query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
+        fakeid_list = query_fakeid_response.json().get('app_msg_list')
+
+        for item in fakeid_list:
+            # 采集item示例
+            new_article = {
+                'title': item.get('title'),
+                'article_url': item.get('link'),
+                'account_id': account_id,
+                'account_name': account_name,
+                'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime(
+                    '%Y-%m-%d %H:%M:%S'),
+                'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            }
+            logging.info("new_article:", new_article)
+            article_urls.append({"title":item.get('title'),"url":item.get('link'),"publish_time":datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')})
+            time.sleep(1)
+
+    for x in article_urls:
+        print(x)
+
+        # 关闭浏览器
+    driver.quit()
+    print("所有文章爬取完成！")
diff --git a/dsLightRag/Test/T2_GetList.py b/dsLightRag/Test/T2_GetList.py
new file mode 100644
index 00000000..6db1e6f1
--- /dev/null
+++ b/dsLightRag/Test/T2_GetList.py
@@ -0,0 +1,162 @@
+# 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
+# https://blog.csdn.net/k352733625/article/details/149222945
+
+# 微信爬爬猫---公众号文章抓取代码分析
+# https://blog.csdn.net/yajuanpi4899/article/details/121584268
+
+"""
+安装pdfkit库
+复制
+pip3 install pdfkit -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
+1.
+import pdfkit
+pdfkit.from_url('公众号文章地址', 'out.pdf')
+"""
+import datetime
+import logging
+import random
+import re
+
+import requests
+
+"""
+# 查看selenium版本
+pip show selenium
+4.34.2
+
+# 查看Chrome浏览器版本
+chrome://version/
+138.0.7204.101 (正式版本) （64 位）
+
+# 下载驱动包
+https://googlechromelabs.github.io/chrome-for-testing/
+https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
+"""
+import time
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service as ChromeService
+
+if __name__ == '__main__':
+    # 定义一个空的字典，存放cookies内容
+    cookies = {}
+    # 设置headers - 使用微信内置浏览器的User-Agent
+    header = {
+        "HOST": "mp.weixin.qq.com",
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
+        "Connection": "keep-alive"
+    }
+    # 用webdriver启动谷歌浏览器
+    logging.info("启动浏览器，打开微信公众号登录界面")
+    options = Options()
+    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
+
+    # 设置微信内置浏览器的User-Agent
+    options.add_argument(
+        '--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
+
+    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+    driver = webdriver.Chrome(service=service, options=options)
+    # 打开微信公众号登录页面
+    driver.get('https://mp.weixin.qq.com/')
+    # 等待5秒钟
+    time.sleep(2)
+    # # 拿手机扫二维码！
+    logging.info("请拿手机扫码二维码登录公众号")
+    time.sleep(20)
+
+    # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
+    driver.get('https://mp.weixin.qq.com/')
+    # 获取cookies
+    cookie_items = driver.get_cookies()
+    # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
+    for cookie_item in cookie_items:
+        cookies[cookie_item['name']] = cookie_item['value']
+
+    if "slave_sid" not in cookies:
+        logging.info("登录公众号失败，获取cookie失败")
+        exit()
+    # cookies = json.dumps(post)  # 注释掉这一行
+
+    # 方法3：使用requests库发送请求获取重定向URL
+    url = 'https://mp.weixin.qq.com'
+    response = requests.get(url=url, allow_redirects=False, cookies=cookies)
+    if 'Location' in response.headers:
+        redirect_url = response.headers.get("Location")
+        print("重定向URL:", redirect_url)
+        token_match = re.findall(r'token=(\d+)', redirect_url)
+        if token_match:
+            token = token_match[0]
+            print("获取到的token:", token)
+            logging.info("微信token:" + token)
+
+    article_urls = []
+    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"}]
+    for item in gzlist:
+        account_name = item["account_name"]
+        account_id = item["account_id"]
+        # 搜索微信公众号的接口地址
+        search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
+        # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
+        query_id = {
+            'action': 'search_biz',
+            'token': token,
+            'lang': 'zh_CN',
+            'f': 'json',
+            'ajax': '1',
+            'random': random.random(),
+            'query': account_name,
+            'begin': '0',
+            'count': '5'
+        }
+        # 打开搜索微信公众号接口地址，需要传入相关参数信息如：cookies、params、headers
+        search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
+        # 取搜索结果中的第一个公众号
+        lists = search_response.json().get('list')[0]
+        # 获取这个公众号的fakeid，后面爬取公众号文章需要此字段
+        fakeid = lists.get('fakeid')
+        logging.info("fakeid:" + fakeid)
+        # 微信公众号文章接口地址
+        appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
+        # 搜索文章需要传入几个参数：登录的公众号token、要爬取文章的公众号fakeid、随机数random
+        query_id_data = {
+            'token': token,
+            'lang': 'zh_CN',
+            'f': 'json',
+            'ajax': '1',
+            'random': random.random(),
+            'action': 'list_ex',
+            'begin': '0',  # 不同页，此参数变化，变化规则为每页加5
+            'count': '5',
+            'query': '',
+            'fakeid': fakeid,
+            'type': '9'
+        }
+        # 打开搜索的微信公众号文章列表页
+        query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
+        fakeid_list = query_fakeid_response.json().get('app_msg_list')
+
+        for item in fakeid_list:
+            # 采集item示例
+            new_article = {
+                'title': item.get('title'),
+                'article_url': item.get('link'),
+                'account_id': account_id,
+                'account_name': account_name,
+                'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime(
+                    '%Y-%m-%d %H:%M:%S'),
+                'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+            }
+            logging.info("new_article:", new_article)
+            article_urls.append({"title":item.get('title'),"url":item.get('link'),"publish_time":datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')})
+            time.sleep(1)
+
+    for x in article_urls:
+        print(x)
+
+        # 关闭浏览器
+    driver.quit()
+    print("所有文章爬取完成！")
diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
deleted file mode 100644
index 63d2eb14..00000000
--- a/dsLightRag/Test/TestCrawl.py
+++ /dev/null
@@ -1,270 +0,0 @@
-# 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
-# https://blog.csdn.net/k352733625/article/details/149222945
-
-# 微信爬爬猫---公众号文章抓取代码分析
-# https://blog.csdn.net/yajuanpi4899/article/details/121584268
-import datetime
-import logging
-import random
-import re
-import os
-
-import requests
-
-"""
-# 查看selenium版本
-pip show selenium
-4.34.2
-
-# 查看Chrome浏览器版本
-chrome://version/
-138.0.7204.101 (正式版本) （64 位）
-
-# 下载驱动包
-https://googlechromelabs.github.io/chrome-for-testing/
-https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
-"""
-import time
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service as ChromeService
-import json
-
-if __name__ == '__main__':
-    # 定义一个空的字典，存放cookies内容
-    cookies = {}
-    # 设置headers - 使用微信内置浏览器的User-Agent
-    header = {
-        "HOST": "mp.weixin.qq.com",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-        "Accept-Encoding": "gzip, deflate, br",
-        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
-        "Connection": "keep-alive"
-    }
-    # 用webdriver启动谷歌浏览器
-    logging.info("启动浏览器，打开微信公众号登录界面")
-    options = Options()
-    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
-    
-    # 设置微信内置浏览器的User-Agent
-    options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
-    
-    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service, options=options)
-    # 打开微信公众号登录页面
-    driver.get('https://mp.weixin.qq.com/')
-    # 等待5秒钟
-    time.sleep(2)
-    # # 拿手机扫二维码！
-    logging.info("请拿手机扫码二维码登录公众号")
-    time.sleep(20)
-
-    # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
-    driver.get('https://mp.weixin.qq.com/')
-    # 获取cookies
-    cookie_items = driver.get_cookies()
-    # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
-    for cookie_item in cookie_items:
-        cookies[cookie_item['name']] = cookie_item['value']
-
-    if "slave_sid" not in cookies:
-        logging.info("登录公众号失败，获取cookie失败")
-        exit()
-    # cookies = json.dumps(post)  # 注释掉这一行
-
-    # 方法3：使用requests库发送请求获取重定向URL
-    url = 'https://mp.weixin.qq.com'
-    response = requests.get(url=url, allow_redirects=False, cookies=cookies)
-    if 'Location' in response.headers:
-        redirect_url = response.headers.get("Location")
-        print("重定向URL:", redirect_url)
-        token_match = re.findall(r'token=(\d+)', redirect_url)
-        if token_match:
-            token = token_match[0]
-            print("获取到的token:", token)
-            logging.info("微信token:" + token)
-
-    article_urls = []
-    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"}]
-    for item in gzlist:
-        account_name = item["account_name"]
-        account_id = item["account_id"]
-        # 搜索微信公众号的接口地址
-        search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
-        # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
-        query_id = {
-            'action': 'search_biz',
-            'token': token,
-            'lang': 'zh_CN',
-            'f': 'json',
-            'ajax': '1',
-            'random': random.random(),
-            'query': account_name,
-            'begin': '0',
-            'count': '5'
-        }
-        # 打开搜索微信公众号接口地址，需要传入相关参数信息如：cookies、params、headers
-        search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
-        # 取搜索结果中的第一个公众号
-        lists = search_response.json().get('list')[0]
-        # 获取这个公众号的fakeid，后面爬取公众号文章需要此字段
-        fakeid = lists.get('fakeid')
-        logging.info("fakeid:" + fakeid)
-        # 微信公众号文章接口地址
-        appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
-        # 搜索文章需要传入几个参数：登录的公众号token、要爬取文章的公众号fakeid、随机数random
-        query_id_data = {
-            'token': token,
-            'lang': 'zh_CN',
-            'f': 'json',
-            'ajax': '1',
-            'random': random.random(),
-            'action': 'list_ex',
-            'begin': '0',  # 不同页，此参数变化，变化规则为每页加5
-            'count': '5',
-            'query': '',
-            'fakeid': fakeid,
-            'type': '9'
-        }
-        # 打开搜索的微信公众号文章列表页
-        query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
-        fakeid_list = query_fakeid_response.json().get('app_msg_list')
-
-        for item in fakeid_list:
-            # 采集item示例
-            new_article = {
-                'title': item.get('title'),
-                'article_url': item.get('link'),
-                'account_id': account_id,
-                'account_name': account_name,
-                'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
-                'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-            }
-            print("new_article:", new_article)
-            logging.info("new_article:", new_article)
-            article_urls.append(item.get('link'))
-            time.sleep(1)
-
-    # 确保Logs目录存在
-    logs_dir = "./Test/Logs"
-    if not os.path.exists(logs_dir):
-        os.makedirs(logs_dir)
-
-    for article_url in article_urls:
-        print("正在爬取文章：" + article_url)
-        try:
-            # 使用requests直接获取文章内容，模拟微信环境
-            wechat_headers = {
-                "User-Agent": "Mozilla/5.0 (Linux; Android 10; MI 8 Build/QKQ1.190828.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/83.0.4103.101 Mobile Safari/537.36 XWEB/1768 MMWEBSDK/20210302 MMWEBID/6253 MicroMessenger/8.0.2.1860(0x28000234) Process/toolsmp WeChat/arm64 Weixin NetType/WIFI Language/zh_CN ABI/arm64",
-                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
-                "Accept-Encoding": "gzip, deflate",
-                "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
-                "X-Requested-With": "com.tencent.mm",
-                "Referer": "https://mp.weixin.qq.com/"
-            }
-            
-            # 使用selenium打开文章链接，设置请求头
-            driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': wechat_headers})
-            driver.get(article_url)
-            # 增加等待时间，确保页面完全加载
-            time.sleep(5)
-            
-            # 检查是否需要登录
-            if "请在微信客户端中打开链接" in driver.page_source or "请在微信中打开此链接" in driver.page_source:
-                print(f"文章需要在微信中打开，尝试使用requests直接获取：{article_url}")
-                # 尝试使用requests直接获取
-                response = requests.get(article_url, headers=wechat_headers, cookies=cookies)
-                if "请在微信客户端中打开链接" in response.text or "请在微信中打开此链接" in response.text:
-                    print(f"使用requests仍然无法获取，跳过此文章：{article_url}")
-                    continue
-                else:
-                    # 保存获取到的HTML内容
-                    filename = f"article_{article_url.split('sn=')[1][:10] if 'sn=' in article_url else 'unknown'}"
-                    save_path = f"{logs_dir}/{filename}.html"
-                    with open(save_path, "w", encoding="utf-8") as f:
-                        f.write(response.text)
-                    print(f"已保存文章HTML内容：{save_path}")
-                    continue
-                
-            # 使用更可靠的选择器查找标题和内容
-            try:
-                # 尝试多种可能的标题选择器
-                title_selectors = [
-                    '//h1[@class="rich_media_title"]', 
-                    '//h1[@id="activity-name"]',
-                    '//h2[@class="rich_media_title"]',
-                    '//div[@class="rich_media_content"]//h1',
-                    '//div[@id="js_article"]//h1'
-                ]
-                
-                title = None
-                for selector in title_selectors:
-                    try:
-                        title_element = driver.find_element('xpath', selector)
-                        title = title_element.text.strip()
-                        if title:
-                            break
-                    except:
-                        continue
-                
-                if not title:
-                    # 如果所有选择器都失败，尝试从页面标题获取
-                    title = driver.title.replace(" - 微信公众号", "").strip()
-                
-                # 尝试多种可能的内容选择器
-                content_selectors = [
-                    '//div[@class="rich_media_content"]',
-                    '//div[@id="js_content"]',
-                    '//div[@class="rich_media_wrp"]'
-                ]
-                
-                content = None
-                for selector in content_selectors:
-                    try:
-                        content_element = driver.find_element('xpath', selector)
-                        content = content_element.text.strip()
-                        if content:
-                            break
-                    except:
-                        continue
-                
-                if not content:
-                    # 如果无法获取内容，至少保存页面源码
-                    content = "无法提取正文内容，保存页面源码：\n" + driver.page_source
-                
-                # 创建文件名（使用标题，但去除不合法的文件名字符）
-                if not title:
-                    title = "未知标题_" + article_url.split("sn=")[1][:10] if "sn=" in article_url else "未知标题"
-                    
-                filename = re.sub(r'[\\/:*?"<>|]', '_', title)
-                
-                # 保存文章内容到文件
-                save_path = f"{logs_dir}/{filename}.txt"
-                with open(save_path, "w", encoding="utf-8") as f:
-                    f.write(f"标题：{title}\n\n")
-                    f.write(f"链接：{article_url}\n\n")
-                    f.write(f"内容：\n{content}")
-                    
-                print(f"文章《{title}》保存成功：{save_path}")
-                
-            except Exception as e:
-                print(f"提取文章内容失败：{str(e)}")
-                # 保存页面源码以便分析
-                error_filename = "error_" + article_url.split("sn=")[1][:10] if "sn=" in article_url else "error_page"
-                error_path = f"{logs_dir}/{error_filename}.html"
-                with open(error_path, "w", encoding="utf-8") as f:
-                    f.write(driver.page_source)
-                print(f"已保存页面源码到：{error_path}")
-            
-            # 避免频繁请求被封
-            time.sleep(random.uniform(3, 7))
-            
-        except Exception as e:
-            print(f"爬取文章失败：{article_url}，错误信息：{str(e)}")
-            continue
-    
-    # 关闭浏览器
-    driver.quit()
-    print("所有文章爬取完成！")
-

From d57b2b94c566b734e3c32c455b22252237ce3eef Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 09:09:08 +0800
Subject: [PATCH 09/46] 'commit'

---
 dsLightRag/Test/T1_Login.py      | 98 +++-----------------------------
 dsLightRag/Test/T2_GetList.py    | 45 +++++----------
 dsLightRag/Test/article_urls.txt |  0
 dsLightRag/Test/cookies.txt      |  1 +
 4 files changed, 22 insertions(+), 122 deletions(-)
 create mode 100644 dsLightRag/Test/article_urls.txt
 create mode 100644 dsLightRag/Test/cookies.txt

diff --git a/dsLightRag/Test/T1_Login.py b/dsLightRag/Test/T1_Login.py
index 6db1e6f1..8c4c57e0 100644
--- a/dsLightRag/Test/T1_Login.py
+++ b/dsLightRag/Test/T1_Login.py
@@ -4,15 +4,8 @@
 # 微信爬爬猫---公众号文章抓取代码分析
 # https://blog.csdn.net/yajuanpi4899/article/details/121584268
 
-"""
-安装pdfkit库
-复制
-pip3 install pdfkit -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
-1.
-import pdfkit
-pdfkit.from_url('公众号文章地址', 'out.pdf')
-"""
 import datetime
+import json
 import logging
 import random
 import re
@@ -52,11 +45,6 @@ if __name__ == '__main__':
     # 用webdriver启动谷歌浏览器
     logging.info("启动浏览器，打开微信公众号登录界面")
     options = Options()
-    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
-
-    # 设置微信内置浏览器的User-Agent
-    options.add_argument(
-        '--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
 
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
     driver = webdriver.Chrome(service=service, options=options)
@@ -81,82 +69,10 @@ if __name__ == '__main__':
         exit()
     # cookies = json.dumps(post)  # 注释掉这一行
 
-    # 方法3：使用requests库发送请求获取重定向URL
-    url = 'https://mp.weixin.qq.com'
-    response = requests.get(url=url, allow_redirects=False, cookies=cookies)
-    if 'Location' in response.headers:
-        redirect_url = response.headers.get("Location")
-        print("重定向URL:", redirect_url)
-        token_match = re.findall(r'token=(\d+)', redirect_url)
-        if token_match:
-            token = token_match[0]
-            print("获取到的token:", token)
-            logging.info("微信token:" + token)
-
-    article_urls = []
-    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"}]
-    for item in gzlist:
-        account_name = item["account_name"]
-        account_id = item["account_id"]
-        # 搜索微信公众号的接口地址
-        search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
-        # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
-        query_id = {
-            'action': 'search_biz',
-            'token': token,
-            'lang': 'zh_CN',
-            'f': 'json',
-            'ajax': '1',
-            'random': random.random(),
-            'query': account_name,
-            'begin': '0',
-            'count': '5'
-        }
-        # 打开搜索微信公众号接口地址，需要传入相关参数信息如：cookies、params、headers
-        search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
-        # 取搜索结果中的第一个公众号
-        lists = search_response.json().get('list')[0]
-        # 获取这个公众号的fakeid，后面爬取公众号文章需要此字段
-        fakeid = lists.get('fakeid')
-        logging.info("fakeid:" + fakeid)
-        # 微信公众号文章接口地址
-        appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
-        # 搜索文章需要传入几个参数：登录的公众号token、要爬取文章的公众号fakeid、随机数random
-        query_id_data = {
-            'token': token,
-            'lang': 'zh_CN',
-            'f': 'json',
-            'ajax': '1',
-            'random': random.random(),
-            'action': 'list_ex',
-            'begin': '0',  # 不同页，此参数变化，变化规则为每页加5
-            'count': '5',
-            'query': '',
-            'fakeid': fakeid,
-            'type': '9'
-        }
-        # 打开搜索的微信公众号文章列表页
-        query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
-        fakeid_list = query_fakeid_response.json().get('app_msg_list')
-
-        for item in fakeid_list:
-            # 采集item示例
-            new_article = {
-                'title': item.get('title'),
-                'article_url': item.get('link'),
-                'account_id': account_id,
-                'account_name': account_name,
-                'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime(
-                    '%Y-%m-%d %H:%M:%S'),
-                'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-            }
-            logging.info("new_article:", new_article)
-            article_urls.append({"title":item.get('title'),"url":item.get('link'),"publish_time":datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')})
-            time.sleep(1)
-
-    for x in article_urls:
-        print(x)
-
-        # 关闭浏览器
+    # 将cookies写入文件
+    with open('cookies.txt', mode='w', encoding="utf-8") as f:
+        f.write(json.dumps(cookies))
+    # 关闭浏览器
     driver.quit()
-    print("所有文章爬取完成！")
+    # 输出提示
+    print("成功获取了cookies内容！")
diff --git a/dsLightRag/Test/T2_GetList.py b/dsLightRag/Test/T2_GetList.py
index 6db1e6f1..d49c40b9 100644
--- a/dsLightRag/Test/T2_GetList.py
+++ b/dsLightRag/Test/T2_GetList.py
@@ -13,6 +13,7 @@ import pdfkit
 pdfkit.from_url('公众号文章地址', 'out.pdf')
 """
 import datetime
+import json
 import logging
 import random
 import re
@@ -38,8 +39,12 @@ from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
 
 if __name__ == '__main__':
-    # 定义一个空的字典，存放cookies内容
-    cookies = {}
+    # 从文件cookies.txt中获取
+    with open('cookies.txt', 'r', encoding='utf-8') as f:
+        content = f.read()
+    # 使用json还原为json对象
+    cookies = json.loads(content)
+    options = Options()
     # 设置headers - 使用微信内置浏览器的User-Agent
     header = {
         "HOST": "mp.weixin.qq.com",
@@ -49,37 +54,9 @@ if __name__ == '__main__':
         "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
         "Connection": "keep-alive"
     }
-    # 用webdriver启动谷歌浏览器
-    logging.info("启动浏览器，打开微信公众号登录界面")
-    options = Options()
-    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
-
-    # 设置微信内置浏览器的User-Agent
-    options.add_argument(
-        '--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
 
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
     driver = webdriver.Chrome(service=service, options=options)
-    # 打开微信公众号登录页面
-    driver.get('https://mp.weixin.qq.com/')
-    # 等待5秒钟
-    time.sleep(2)
-    # # 拿手机扫二维码！
-    logging.info("请拿手机扫码二维码登录公众号")
-    time.sleep(20)
-
-    # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
-    driver.get('https://mp.weixin.qq.com/')
-    # 获取cookies
-    cookie_items = driver.get_cookies()
-    # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
-    for cookie_item in cookie_items:
-        cookies[cookie_item['name']] = cookie_item['value']
-
-    if "slave_sid" not in cookies:
-        logging.info("登录公众号失败，获取cookie失败")
-        exit()
-    # cookies = json.dumps(post)  # 注释掉这一行
 
     # 方法3：使用requests库发送请求获取重定向URL
     url = 'https://mp.weixin.qq.com'
@@ -151,11 +128,17 @@ if __name__ == '__main__':
                 'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
             }
             logging.info("new_article:", new_article)
-            article_urls.append({"title":item.get('title'),"url":item.get('link'),"publish_time":datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')})
+            article_urls.append({"title": item.get('title'), "url": item.get('link'),
+                                 "publish_time": datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime(
+                                     '%Y-%m-%d %H:%M:%S')})
             time.sleep(1)
 
     for x in article_urls:
         print(x)
+    # 将返回的地址写入到文件
+    with open('article_urls.txt', 'w', encoding='utf-8') as f:
+        for url in article_urls:
+            f.write(url + '\n')
 
         # 关闭浏览器
     driver.quit()
diff --git a/dsLightRag/Test/article_urls.txt b/dsLightRag/Test/article_urls.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/dsLightRag/Test/cookies.txt b/dsLightRag/Test/cookies.txt
new file mode 100644
index 00000000..4999d643
--- /dev/null
+++ b/dsLightRag/Test/cookies.txt
@@ -0,0 +1 @@
+{"_clsk": "1v8cz8t|1752541383487|1|1|mp.weixin.qq.com/weheat-agent/payload/record", "xid": "fff1911b542cde79c5c47a38cb3929c8", "data_bizuin": "3514353238", "slave_user": "gh_4f88a4e194da", "slave_sid": "cDlUaWlaek5RZHV6SUIyVWNNZlJGYTJQdHY5YzUyN29LMG94RlptUV9lbkVDUWxmaTBURFE5YWNKeVRkYlZSdU9VRnNjWXRKN2xfZ2pZd0JWal82aVpsRDhqUnJXQkdYMml4SlhrdGtGY2k2MG95YTlQVEFVanpIR01oZ3p4dldiME9hRE1zcGxZV0FlNTVV", "rand_info": "CAESIPFuk5/nui6QoQ6zEO2B5RfaUmjuQjTJOQVg9mBuI/XG", "data_ticket": "AIy4PwNlFMRBDHcZ7jcXDXf/8fFLl5NS25Nj3tYuDL8H4W8EiURU4G9Dakn7aSUC", "bizuin": "3514353238", "mm_lang": "zh_CN", "slave_bizuin": "3514353238", "uuid": "91eaae9bc5e4f725e03ee2b7e75c8a2c", "ua_id": "bbkG1LsuVI1DszGdAAAAADm2HzejXloc87mSyGEMpdY=", "wxuin": "52541365079710", "_clck": "1l32fbr|1|fxm|0"}
\ No newline at end of file

From 8148f2a87d0463bf66a3577504b400db7b19e8e2 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 09:11:53 +0800
Subject: [PATCH 10/46] 'commit'

---
 dsLightRag/Test/T2_GetList.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/Test/T2_GetList.py b/dsLightRag/Test/T2_GetList.py
index d49c40b9..53fe831a 100644
--- a/dsLightRag/Test/T2_GetList.py
+++ b/dsLightRag/Test/T2_GetList.py
@@ -45,6 +45,7 @@ if __name__ == '__main__':
     # 使用json还原为json对象
     cookies = json.loads(content)
     options = Options()
+    options.add_argument('-headless')  # 无头参数，调试时可以注释掉
     # 设置headers - 使用微信内置浏览器的User-Agent
     header = {
         "HOST": "mp.weixin.qq.com",
@@ -137,8 +138,8 @@ if __name__ == '__main__':
         print(x)
     # 将返回的地址写入到文件
     with open('article_urls.txt', 'w', encoding='utf-8') as f:
-        for url in article_urls:
-            f.write(url + '\n')
+        for record in article_urls:
+            f.write(record['title']+" "+record['publish_time']+" "+record['url'] + '\n')
 
         # 关闭浏览器
     driver.quit()

From a3227c1967c44e528ab0f426860e924e6ed18e4d Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 09:24:10 +0800
Subject: [PATCH 11/46] 'commit'

---
 dsLightRag/Doc/2、Conda维护.txt              |  10 +++++-----
 .../{T2_GetList.py => T2_GetArticleList.py}     |   9 +--------
 dsLightRag/Test/T3_GetArticle.py                |  15 +++++++++++++++
 dsLightRag/Test/article_urls.txt                |  11 +++++++++++
 dsLightRag/Test/out.pdf                         | Bin 0 -> 19544 bytes
 5 files changed, 32 insertions(+), 13 deletions(-)
 rename dsLightRag/Test/{T2_GetList.py => T2_GetArticleList.py} (94%)
 create mode 100644 dsLightRag/Test/T3_GetArticle.py
 create mode 100644 dsLightRag/Test/out.pdf

diff --git a/dsLightRag/Doc/2、Conda维护.txt b/dsLightRag/Doc/2、Conda维护.txt
index 021764bf..80bfef40 100644
--- a/dsLightRag/Doc/2、Conda维护.txt
+++ b/dsLightRag/Doc/2、Conda维护.txt
@@ -5,7 +5,7 @@ conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/f
 conda config --set show_channel_urls yes
 
 # 创建虚拟环境
-conda create -n rag python=3.10
+conda create -n py310 python=3.10
 
 # 查看当前存在哪些虚拟环境
 conda env list 
@@ -15,16 +15,16 @@ conda info -e
 conda list
 
 # 激活虚拟环境
-conda activate rag
+conda activate py310
 
 # 对虚拟环境中安装额外的包
-conda install -n rag $package_name 
+conda install -n py310 $package_name
 
 # 删除虚拟环境
-conda remove -n rag --all
+conda remove -n py310 --all
 
 # 删除环境中的某个包
-conda remove --name rag  $package_name 
+conda remove --name py310  $package_name
 
 # 恢复默认镜像
 conda config --remove-key channels
diff --git a/dsLightRag/Test/T2_GetList.py b/dsLightRag/Test/T2_GetArticleList.py
similarity index 94%
rename from dsLightRag/Test/T2_GetList.py
rename to dsLightRag/Test/T2_GetArticleList.py
index 53fe831a..bbf4318f 100644
--- a/dsLightRag/Test/T2_GetList.py
+++ b/dsLightRag/Test/T2_GetArticleList.py
@@ -4,14 +4,7 @@
 # 微信爬爬猫---公众号文章抓取代码分析
 # https://blog.csdn.net/yajuanpi4899/article/details/121584268
 
-"""
-安装pdfkit库
-复制
-pip3 install pdfkit -i http://pypi.douban.com/simple --trusted-host pypi.douban.com
-1.
-import pdfkit
-pdfkit.from_url('公众号文章地址', 'out.pdf')
-"""
+
 import datetime
 import json
 import logging
diff --git a/dsLightRag/Test/T3_GetArticle.py b/dsLightRag/Test/T3_GetArticle.py
new file mode 100644
index 00000000..02e9cce8
--- /dev/null
+++ b/dsLightRag/Test/T3_GetArticle.py
@@ -0,0 +1,15 @@
+"""
+安装pdfkit库
+https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf
+
+我是在Windows上开发的，所以，下载的是：【注意要科学上网下载，否则太慢了~】
+https://release-assets.githubusercontent.com/github-production-release-asset/131323182/3200f380-aba8-11ea-8942-42fa5e27a312?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-07-15T02%3A10%3A32Z&rscd=attachment%3B+filename%3Dwkhtmltox-0.12.6-1.mxe-cross-win64.7z&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-07-15T01%3A10%3A07Z&ske=2025-07-15T02%3A10%3A32Z&sks=b&skv=2018-11-09&sig=IYNB2Gi%2FZ9tZfPXmo7PbqjbxmcLULpP%2Bex2z6lp2DvE%3D&jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1MjU0MjU3NSwibmJmIjoxNzUyNTQyMjc1LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG9iLmNvcmUud2luZG93cy5uZXQifQ.LyZXiO_mRK2qX99CTJtVwypU4DLsK-_Js0wspzsL0Y4&response-content-disposition=attachment%3B%20filename%3Dwkhtmltox-0.12.6-1.mxe-cross-win64.7z&response-content-type=application%2Foctet-stream
+解压到D:\wkhtmltox中，还要注意把路径加到环境变量中
+
+conda activate py310
+pip3 install pdfkit
+"""
+import pdfkit
+path_wk = r'D:\wkhtmltox\bin\wkhtmltopdf.exe' #wkhtmltopdf安装位置
+config = pdfkit.configuration(wkhtmltopdf = path_wk)
+pdfkit.from_url('http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd', 'out.pdf',configuration=config)
\ No newline at end of file
diff --git a/dsLightRag/Test/article_urls.txt b/dsLightRag/Test/article_urls.txt
index e69de29b..c8d57b34 100644
--- a/dsLightRag/Test/article_urls.txt
+++ b/dsLightRag/Test/article_urls.txt
@@ -0,0 +1,11 @@
+长春中考上演“神仙打架”！省二力旺等五校过半考生超700分！ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd
+独家专访赫行学校2025年中考“双黄蛋”！学霸靠啥杀出重围？ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=2&sn=c7733f7c2c6331e51e55af695f99a43e&chksm=84e1ab0cb396221a7d185dcb99acc9dce45cc5c66c3eef42680a215b710bb9bfa9fd10da4419#rd
+长春40所学校中考成绩曝光！700+成批涌现！谁是最大黑马？ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=1&sn=1f314640ae6eec236b0e16271bd44362&chksm=84e1ab1db396220b73ae08898a026d887436501a6c42abe01d7fa4aef9063533fad89720d3b8#rd
+喜报！长春外国语学校女子篮球队夺得冠军！ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=2&sn=31651043acb6ecbf4232e92e635196b6&chksm=84e1ab1db396220b0810c3bdf332128b110d1902658f2556eaeff67cec084a8a068a5ae9a275#rd
+“趣闯盛夏·探无界”！探秘一实验银河小学夏令营 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=3&sn=8edf6ce8cebdaad55343b39639876c27&chksm=84e1ab1db396220b26b172b3b565f919f7ded4c2a5b78227294ea29a558a7666c33b8c1de660#rd
+刚刚！2025年长春中考各批次控制线公布！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=1&sn=282e5e824410a9a92a83dd800cb58a7c&chksm=84e1aba6b39622b03fe6422032474c9696f83541d9ff9b8b6a9f0f099ce459da430f720d05e4#rd
+重磅消息！师大附属实验学校（经开）校长有新任命！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=2&sn=9449c87935faf86ddcc5a674ea888913&chksm=84e1aba6b39622b03fd8413ff1e74b61f662ec8deb3887c2c5b8e5ad15470b15ae14b21e94ea#rd
+市教育局最新发布！长春2025年中考成绩将于7月12日公布！ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=1&sn=a5b4104d2fe74ace32ab31faf5f1c44c&chksm=84e1abb2b39622a40b0e0969e84fb00c753cc8ffefb8624726afa2a7352ea725c7f967bf25f5#rd
+长春市第十九中学2025年职称评聘拟通过人员名单的公示！有你认识的老师吗？ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=2&sn=693d13964e4be18718c0eb4fd13ba68f&chksm=84e1abb2b39622a442a9f7cea8ddc72820050b2896968f2d0ae283c7caca2dbe014a721feb2e#rd
+高分喜报频传！长春这所小学靠啥成为“学霸制造机”？ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=1&sn=a0af7f484d6a3300a9b7f3d787a2594d&chksm=84e1ab9cb396228a56420696eb09071ff829d58e8e31bd652f849f3cbd0ee276b0baad7a1e89#rd
+蝉联冠军！吉大尚德游泳队斩获骄人成绩！ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=2&sn=cabb682e99978bf2a58ff0e9e06dc53d&chksm=84e1ab9cb396228a5cd457cd7ee0728491e6b3dc34fbde02240624364cfa8a9e2c533052d2b4#rd
diff --git a/dsLightRag/Test/out.pdf b/dsLightRag/Test/out.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..4aec06ad8c5bbf081626dcc93f2bfe0877c5ab8b
GIT binary patch
literal 19544
zcmd6P2{ct*__vgJRfbGu$dwSno$poV;hM>mDKbmuIdhq(gb*^6nauML8VnH$WeCZX
z$`pwxeCJ%0(*OJSertVez1F(UJ<r+C-p{l5Z~ykW=Xv&FJ1>Fbf$<_qVGt<9#_$TM
zfB-3<nz^H;3FP#~CMPMMxV?#iqm4ayM&LqVL2#MCk-&<;68LXJU`t?3U`hZXfD-T$
zzzE>Le>9+R-rmO8$;f0&MHwgvAwU9`u)qhiQ=z$ywS<8q(1yfWI24Y8VqhpJKOBL;
za6_>iP$&l{sgMw<iM26k5n{JxuC^u+KFO<&QmT%CJ}IB7D8$j;3H+h}f$~D(q<lsO
z5Ew5MFkE!&C@P~0F*UGsz?U&_uz>I>+E|<XtRA^ty$;`b14l;_dus@vgo(4ck%@|w
znC^}Zm|Zpi4FD}6F;Em13r9e<T7v8{82fu+I6oGO;D^IdTZN&#y97TC;omU?3gOe#
z(uVLOcrhp__y>WIVqpm0ofGH}vtZC&O{wAgM%3Ec#u4ZaCqqa4xxBfx1u373iIF2j
z2Zn@gR|$b&FeqN2<MFk_p~#<dEM`ZpOUfr^V{Z(Mqz>2)&@7)Q1dorcmW&*MVFa3j
z@TsfFkb?gpr>__|8#ox*o7*~`b+$1#zIYmXk@FmWG=DmZ+GF&212bTF&Kua9SUW=C
zz-1MnS8c2vfl~(v41^{iP%&|^ak4iu0VE(K014h#;1nD};8elH*xW$O=IWNYD1H<K
zi-ha$8W_CZ4#>rAEN$#nZ4Hb}@IIXvSG9#~R|J0j2?D?rRa>(ift0<0D_G@E5aQLP
zR1F~8R&7BO0|7AuShaPEg@C9~!E5f|4CqR*A{^i-z78B3g8orRQ_1iOIA~jies&(-
zS<nX^2!8ZcAV1Ntdk)K+SerRshQPLa6K8G-U>m}RvjnD%go%-ju?g6?gCj7#tw^sX
zHpMDfFu*7R=GSP>5}&caU1B}$f7OD{?esCeB?>o^JDF_#_gJoPtjqSEy?>eURzO*K
zv5DXbb5lK;v?TALi*55O_HV4b+6*jfZ@l2b>gvH8%upgK<2Ew-%_H{x!RK5#dJU86
z$*oS*Qtk6;L%&sV(Qy&gDfIA@4xX;KbHh7`&usC+1ZVlle99yA{lu@9RSyIT$i7^8
zk{@^ba8oMX=a}2LXB*nI-?Yrvk_eb459iMmPSi5?yAO<zG=4W@c^YKU2p>DP5auU%
z?;i?8a9ARuL_ghvW8<NGY*?N)VFG0)8Pw@o&VG3&{f`71fj3FRppWj795g*S#Rs|g
zIJLh->CUQ^GqYb>ZD;22)EG<nTz=iyf?!JBTFZ=F!8fY2QIo=L$s5+oEp_x(o4)Vd
z3va!fENgOBy>Pc&WSCHYx|uzA_LybMU2_G?LyVtQk)<5$3p^yV6jyl0$|$5zUR54n
zX-l3<guDn4$;mwDfA;9f^xA_2YTDUI)sD*GG{PeE`3?lzesq;Qs~fH^6&bH&eNfqs
zA%<A|!X3N0$aDFYhozH^C%ZD@9)>T$>O;t!glI-cNx8J4%?2G*l4Z^5SJYkeF4K%a
zxb!AVJs)SyaNmqkY58)Zd)>ox`MZj;UT$AXb_=JXMub|ESvZ-_<-8eHrk19%oQ)+9
zZlirZl`MC(g7k<>K6IbwMd=gPaAa+=E#H}QaI08lQ}&WgVQ^hQtv-;otLy#n0j}f}
z-<Nmsvfkmdxc<p&a<~gTu$tI90h-uXp1AAf7ehzR(F>S@?75AwZ7#>d5kxPFpMQs2
z0b1O(^x&!G|IP<nlVmp^Y|X77d;l`(U-7|irvCrM2jCR{A7(sw>3^K(KbC`C6CCt>
zhrf0v_%_Y_Zxg)7g<Mfhn(x937c$=S(gEGiA6-1?_Ac`v#a#lQnx}*~TDS8RKK}P$
z(fJXH&(yQeKc#s}Lvrd+HDt{vY5iKBJrOGrALO}8P&g53rU=b->IwUJhJ@+shoML!
zdxwbXfP?I&Zl<vTv9uy~`yF|u?G9>G#fqJHiWWSye?Kd$5v!4SQC+r&?x}ncv!kO(
zu{jNr2|@y&Ad&DtfMjR2g`rS8Ac5~)>He=Ef$v!~fTbFbitR-M{<n(;UT>$<|6?G*
zJF&GG{8$C>p6}SZ4HEcYt^zj{i%AJ#hlfAY#QRTY+;Fj)BJ=S*e-<0$n|L>fgO=EP
z<mI$&&1Ko{l@o;>78)O}Wf51f4BKV9NQ<;NBWYrL+KI;1n%*Nd!k&;EM@R(05%TjY
z{UmOwkII*bT-+&dzEV5Ps%fRZD&itm=#XnZaQ}_PyhN6bcDq?W@23kDoKnVz8G7gI
zQXar2w2F;v<l2Q(*V}GV>iOLM6HxGOfv&<(KiCBRFWCgXhfUzWvI+ce*#xh*)A9c`
zp!|VN@SgA3y91QJWD@`=WRD>f!}HwMXwEuc6@62v9hBuTS@WZp5=PprcAr*=o}JfS
zddf6kCs5T_FizP?Zt3}8{JahGb-ykoU2M-8B6G>6%UbGTPY(P;b%4Z`J#Iv@q2qn<
zQA~AVm9X)GnnDxwx&dn&aUO!{c683P<b3}?UJr)n-JzKaSGfoM*rHTqnn0i^4@E-K
zl6Ya8LCf622e0n)b`$a2ODXUll!AutPzvH-QVL=Zr67K#6vW?B3SMug=l^3!!8-xk
zrt@>%!F#@A?>15pe@!WZhaN)#q-YGCLDtE2d$b=vKzz^GmV!9vt~B>S;?4Tt4ge{S
zb>Fx@n9!>Yml6;zI<gQhUMGIwXhm9eQKBiQGzS;1CD)LVT{`Y=pit+5&$Y)YjteS}
zv9Hjq1fEIG%{j(iWGT6NtyaE;&7>Q5tn-aYQoT9sl78i5odI^n$Y@zEule<f8>VJ{
zsNU(LpKo4nSTrK%^Jf={-HQ~wU!bqBT}b(toPyZHDTrS=1@X6>g4f&W`TrVH{=g}C
z&v)$ILCXINrw|oT#~qhvlqd1^wS%I%;(DbSzk8DJ=9C_gQ^ae=&(BAQkXlS!)Kn~`
zE~BDJQ&oeJQ5m;Wuq?Ji8{`b0y$`0x3~3B8YYM2V!sPX1XeRfY#D&Y{8ux|1;hxvL
zJyI6OSEMtVZ%L&nn=f<A4Ma+pL@MHqYVfL5*`-z3<3m1bA~*hkls|9^@?UZaau25<
zf8`Y9-*O6GZ>Q(~V@Sa}0ot~UQ}CYe*t?As<X>}2EpVGcF|)#5LCaftCr%?ttcXI4
z^iXjG74<@+{^<OjtDK4q!$o)E4Wg2~V#uz#=`(jT1v&>FIF*-~Q<QAYE6qU<P>KN~
z?r9v0*crr=C0>68QKjfAUe8Cv^+7GBxdD`Cbd-hlb>BNF#>aw`LIY3=D#f7oq0aD6
zN=b?gKVY$Z(P)EmR>Ldo6QQ@Ui0@vY;N1dU-F2IS{Fju1+(RkIUnvFox0Hg{+v)iK
z8c_Z~DR|F!?A-y%UsK92i_dHP;={7F_yp1T`g)I~|GxMXe_6OLUgvq>XlB;d;=|2_
zdj>2%IO#Zu$Wz3l@4RbPIKJqrR?nt&me8y#Tmh7a8I*<I3iS*|!JV`(w>&r9S$y0F
z*w28APx7+YV)N-7|Jmp}hIphL{&diXPbBOQNcjV$p#CMLp!QG->Q_oZ{Vk>7^>%vx
zKZX>%6QFIoC<X8Nj=kGRLH#wQ?EdAG@1pQ8e)%|U{qlK|YiMl6-E80&cKoD7{P9c|
zPRj#=U2nD5d}`~0T0o#AI_G-aJ$J8<|5e$H`h2-r*M~c_pO04@dJtn35*$|C{Bh-t
z(wicus2=B;AHRJ3-ipNR1q$9R(A8a(g8G+~g4#nVs9z}s^|zFQ*W2m%{~A#KKq+|7
zckJB(%3o7TF)99jmfM;(L66t!)^p%~cAHYb`&sXi#9fq9hrgd)2JUC8JrAf<WK^G5
zET`_z@#!*Gg_2R7lBa-7#Or&}t)tU8;MVq+?7W|j<-|2YufMv`N?g<7cu_?`A*)7j
zg?D;Mp~>U=FsV(60{=<VC%A?cuf<%OIO<ZT3p<oTc>Re;=pT^s2TDQzOG-iSp%nD5
zl!E?SO2O;x^!$GeDR?J9+jdb3-t!%Mw~>PWYf1qZAL^lz!mFh(`aJrPNeB`$v8Qh#
zWGbSO7Y>tcc#c>g)aVqX?Olc@b?am}9taeZeYo!FDo!YAmG;iR<bH;}08N<QpieO~
zi%|ec<osa=UVo-EI{Kl->e11sW7Pms#O~`vkvl$d)Ktmu4ZoOmuCrz>)3E-rm%mFq
zPLI<luh=c&TPt_)*@V=KpKo4iSTpj&_(c5aSqk1O&{G%+3!(`8ivE|3g5JX@=wBHH
z{kM#Q*W2m${~AvIz$kdnckJE4$-iKf9}nG7`Lkc%{r=FcarZ;FRq&x3Yy^1d)_zxg
zYDw*MR{SCM>=0N){;@zxQd;SVtF&=%CaVmVBz!IA4$Uo}IdNa+FstKB&edy-J=2L%
zdTx{MANNnM-3eJeJV^A$_ztNygVBu)aOt7oC;J%e)A*hBb>a88w?v+%BA$B*1^t6i
zem-==fCRDQ{-sMAa-xdj(n>0q;E=5W{~5l*fW)y?@@JSi#@m!lk>}ceMN%7T2&1c>
z;((#5Sp|e?bDzsT#W0mSCTs38iqV%@Znmh(Ufb`)e8Zmr0|cF+DhD_BagstHJCQ@o
z*31M15+)9g5Wd}(fW^dZoPbap6byWABOI@>V-7y_pycFeX>JV!8ToML_CO_ggMnaz
zyupu?UFsn3?fAVFe-$@yG_bTW1E2Yu0kv#%#gAIIO~hYS0D^Ep?9{<z=PD`QZU+dy
zJMpo?9W(zWVPn9F3mE+~WQy4a<Mz{U%=WzeUqhyNy&Vt#V-peY1ZdkX!p3{PWAFAv
z#B9^j{}v+o<>@!upC;n}`1Cusbn6~)R=w%{fZFd*zngbG{dOG6S=^@s+yk0upPh7@
zt9!qs5Q?9OZRpA1GW6?D?(-ZrnmST#5esbZn7h0tTS)jk8$`VJPDH$0psTy?0Wmw=
zBp?9c+X5sQUIwNEJ|=HtzZGs)G_V4Yrz|ERDk{pYU~Xh@<6vXzsAV8+Vy-TuVq)fG
zX#keAG%#}jV&YpmV))294-AgvMZsVg7#6|<hr)QlZ$PK>^Ya7Q7YHN*1Hr;jyhu1d
z8jAs*Ao9ro)4|+G)Y=TlD1ic1i8>g8L4GuXpBH$-1_N%z@TkWNd0;>-7$^n`Ja7b}
z?gqAiJ!Y4I84H}LI+|ErfFSvKu}~yXHJC~PdIpTC2^@k2=f~~|gBSb?0~io5#{y3x
zfT_F{8pm$m!~Acs$af|7P*Y$2B)C!+-<lrTnhrU|^5oQUX=x_Y0HoMamDB&Q_!F?O
z)J433+@mL9+824!=V1thq~^l;^PHq@xm6!iPg*?k`Es)=mL|?+>fB1lsRJ)J=T_G1
z<)1%rn6I5(a+srl_+cY!gGHW3KX@89N&Ka!$c#zMTtVFB3*WboF031ZEaDCp_uIJc
z+jB%yjbX^XwU_sgre5G{a;B^By~1#=CNZ$K(a@cv;*$C0g42VgQxDVpzTP&hEh)~s
z?|&+a-m<}G_F-m8b1p;ViGTVxd?@;=B`A0w;Zh-rA;i=;M!Uw1bo2BCYg?7`bp9ea
zL7OgAHSnc;KSsuJUy+hU!u_q-qnArLZ%g+Dzvhg{4VDgAa4{_CzA=wG?KQ_DaaCz3
z@MSIu?22Q8eiF{*CR7yOW%+^AZloi%>9HsyooVR#X0EYd<}Rw#SLU|amCsf0sS*8i
z_Qt^2<b5?x^@F7{X}*w8Y#|ZQPji%qGB^v*yEtS!&QX6`%IS;YW2DEpRm6J|3qK*v
zerCd;*+a1=pv%Rh_t5@CThKQGYgsddnUZV=*9+hQkK$cXiU#V7_Xir#aSLxHj1Xg-
zDl$met*cS(N`@8h<vlOqGDwx2rk@da(iy&ZuW;6;E+9>mmPx<iAKNO-Cs7tjTLBO3
z^mTEe<nD7ws|fp?&~sLi?<^i%&Urb@d{SQDDe&Pj%ChQOIqfc#i-YtjtZ=(?2$wlg
zk_~ZHTkol<9wz>!p2i97?<o;Ux0LThoxhW)>2B!eVR`YoW1jxmB)yBr1l^t(wb!^J
zrfcSfYItqz*-KKEUW-vr(TqQZj7c6`vAdn3ohHRQy^k$*^io}!F-`uA!N&c#dMSaH
z@1vjZbUu=j=kqwyGqA5!6T)<l>d^bcy@U+>6e8i1+<~W?DX1>>H45WwCazU>SuWHy
zosJ35jdsgmX?$z6RBUUly##OYaVmcK(Dd=fDl}NIb}Fmkw$PU|9xVBHv?MM?EajBR
zP0>0Utib3}=uUiCc36q7NO%9fWV0@<c`#n3IAT?MJ(RDvVR^!TXnmSN_2|dGqLi^q
z#91%PL|;%;=6QA0IsKE#G(XNM^}3ham*#q-KSii`B{nSGiQ&C$jt+%QPys0)ZHXp*
zL)_c!M-@>2bZJ~TQ3S=&>o3_N)S_fcq<KXxh~iZgvvikzcpLQ>79_(e<$Z4Q(Hu_J
z{xpd^$@+=uB*a^%Y&kIR<eNpfv&llQ@+HkDdK4t?q`gQvNd?zWCbpO5WKSnvsT3OC
zu>aZ^!YQaQ@G{zIqwy1E3ga5vr0*1b)47|zA<OB)eAG-ZIZdZ#6_k$m8uK@?02DOZ
z)127NRN3jvN%K2RC+VqLw6Ac;4pt1NOM72(PFOefA%GV7N(yGY3krFj8$ZeBG+XLN
z!<11zTrO$588x(6@LFJc;nn@Zyk_CcdN-Coa#5?E^wO`ipjgrtBxEb?<@F#D{eF!n
zkchFWkijzaf=7n5lr<TvsPU=A9_g_c?i5uD<fyYmaIu{3eyi5JR|6<2(X8nn<s<#^
zIWf$S%gHo*pBFe4F~7doM=#SFUGm(VMmxo(vQ!UtM4%s4L4ceguJPC1Po9%J-rdyv
z&%1EgxS}yl7R@`(qy0xL`mV+v$Z9@x-?{wxaLLdbqh~=Xg!A*M1F~lO;%@XJ7`zFk
zb-t=z?S>Z$+`F&6*cJzK<*Lz;eP9_G@+<^8hMPNi;6sE$JK5WuFSBW~%9iUTbcwOP
z4)aIGn-t{O6}7Nzohw&!;tWw-NQ@VSUW1lwq1dU!8v-tG-?2VUL^(9K-p@MSQB@P(
zcsD7Yhd$|nFJ-(b4)^w>?7D5OQ@kxhfjm85%<vmS+p@k)RM<Y<@3WP1bis<;?glod
zX|G+|+RK`T%hWYnyaZG|rSn~{xE+Vm2+&df!=p>_=*<Vi7AZ-^BR4~gl>^OXLk<}~
z%6)1(I#+OKK4m|bsc97|bZB&Z758>kC791`-8d?M@ohMR3{I-!#)D3tuh*U!B1zK*
z8izSure(!PWIAXC)or_`6-cQox*2Wsbw`|wNBRRGhtzy;FHv|FgeFI$5zFKW{jFp!
zno1tL&OQVtidUvSMJ7ewx=2jZLd{>zwK@5`p0wh-z}Hl<4l?n2n-W)kZGX&Kw^9Y}
zl=CGkx3S>Nkw&)ecZr0r>CcvzoLqS_$&hA9a5+j+x41M^(1FeIB_&CYl4(nopWONJ
z&)!)JtDzr5-RXKe>9ZH8A6tz)aaurzziKlgvI;A_iIMe_Yel{`3{XOaBCGH1`%1^<
z9h#NDa;O}$IXTZv|88o2o{sk8ypnC!J#?u(a!rGLOd;Uz`mq;oRb&T<zJ-tu=fBwR
zsn~eJy^O3ctw=|%NI8vJw(wEcDdzwp(!q0i7Uw<=BG2<WYWI=(@%Qk0XJEcNhJU=(
zPzV2zlrk{j`uY76o#ebbeFnpKVmTWRpRkOk<<pi>r9Goh&Z7ybp%boVZF}%JGF8i^
z(B@6qD1Yn|R~qx$RK_zEuU#{mRR19%=Wb-^RcyjwJeqoAdTZUg_@*sQn?jF$2^?hQ
z(ICl9B`s*PPyF<9@!=z<r#^FcN#mq7LQi`3J$n+kKIEV_$x=Vy*3jIe<@UVJfpYM~
ziTy-J6z1gFJihiLZZh7g80<&o$|sIW`maL|oG^K1<N0MSE}hCO+0Qk*{8fdsNzv7P
zo$mu<Y0gOD;_nGa!CLiiQZdRu^5o5?_j+%PIrl-g$X}2U(T`@iZRNvNvA;swtVHXs
zK4uoOF&ltP8x{+!c&ub))t;}uR#N^&`D%j6KQ<*f7i+W!HBG2rcR+>&15QOchCy9j
zBvihCT^`jtIMBueWB16Pm6NKv>$IQ2Q%EITmdo9i9;YO&^dfP!Oz|~NxN5^!tE!41
zv!tQE>(%T;adv&jm9KUtw#Oc@uV3uW551ZZFH{CUeV``;bH<J{Kz*P)^FhG7RP)eS
z_Vg)5S<XyvrVFt;`kCV{7x=W*>PoF!QYD>i8V)+jSDd3x$)ob0Sz45IVu`G+kB_>L
zoO0sc!1u9^{zNUOp3KUg!V-sZI7gOLS(Ac-r0#LkY2|R*PO2kUunVv3UYEnCU<=~8
z`%<3Vb)_978!nbF>^JtbIPBT%ynfE^)>8MG8aajO8vWJ>u6a`Q=-XZ=t;DCqJuI)k
zzZnG^3~m(Qjgc2gc1u--y-v!kOcH*da#}n5CSSni+_Bb+5swNiS$vu(d-zoq>MONw
zb=EI1d%X0T2+H8kgSe@<>m}WOt6G#Tc)zydo6HM8zY;z&CWCq6fJdJ=F3_rb5)nHU
zI~*0iKU#yDzTfPm!zwjD>jxtWbdD~#PFQuwgC3H^WKqRuDQ@5G$8N#T?$;m?LF$@P
zaB1_&>E$OCJXEX*?m6!VJZ=}iT&mb7@#r?<+!KapCleCpP`d8wS?<#+7n-}Ms`>1M
zXUw=IYFhM@F;NnfhdR|52f{T{ZlW$bYD*TWWRTztVQCdl_w~1EyjN1e=G(>yJveZ%
znE6tHR>q04?3}~tT3Mr%UmH6En~HGD<59{Ms_)EB_-8M3Ka(ZzxT5*GV=iAef_hk!
zvql8#Ua5P<d%v#vm|CT&wvmF<;<4Lz?P-%=%+FfM+}+6UE^|s1LGbcCcQv8wm_KVt
zB2>g!*96rmgEOvQr}fmmYk6~7q|Yocj`doiCcV$(I9VE>T<C{x8b^O*N^<0whIDXh
zk*QSAauQqb^D%`%24<h?xBNHRr3>4QG-~50-=q_}g)0b`*{!)~S&&nwh%-<4R~t}&
zY=iP+qG#VEdwu%u^zaSIllN>Nbm^bVPG9(d^DfKdI78odp*z0Yi~f4``lZ56$m6=t
z_ck)Dn!k?RHN7guWL%n6GZ4F)=gKQ<mTh%PmLt|Lvg`8HdjgM11rHCDv-{MA=jA>7
zPE%77u^lA-r^MF<>;7nhB%y@$Z7VY7Cim2Wc<oCjS#mkpAfcT+p}`}Uk%2dL9@SQd
z{FKLpgNTY*EeCQx4T-oVS${P4XVO|qxK%s#$h_mDXc9l-6))*>^0$s_*>WMFj%3_^
zzBx8eye9Vx9%AoWT7EX0;^A{ixU!f$O=L8|Je7;4DzU+xa9!OEfeK|;@mz0mzZvAC
zI&+mLKcP5awu++-N$!!3b|&K24Qi4O{tCB~tV<_U|Il$cTDs!AeQvS&(-RHN&8*#e
zqHGsRIQpk%N<-(b(|H}$SO|(T_@<cM@_xFx&aZcWD=q58a=WP94VX6h`lqlpew&9S
z%>kvI8XCeR4eKNtHr>GyCEm=LRty8v?#u`KR|zDFS!`*J6^0I3x|u{73LFnXnVs%K
zhY3D{U-I;v@o@4xk@xY;vNp9(S$*6+58kz0VprIQIrl`}+Df7Kqq&A&dWflFZq_u>
zuH>oaH<=cLWKVjIKha$N+&c{2ko}BWera2ixZc<=DVwC)?C$gRaT;^!wEISvDLR&^
zVJZrOD~`8O3ai>@mhO4!v0!kXW{6n+!XwpiiS-GyFczZ-m$>sRvlrjd&!tEBhA)bn
z@l`_+6cgVc5^(9&tbO$v^2>KbR_SnWoS?5@z6%v<2$}3w756>Y?DvAWzrSF@@~V$#
zWV18k6>JUr02$7i5mQ_wK$gosC1cUXUYv57&^Wy)Fuihgoxg5I96c`PI7F6L?tbXw
ztpstZu@KceJ!aA9n0!m4<`2w$9zs^(wgTy{MrIjmuRhr}g|kWAc=hmf24((3I*aSr
zFxT;}b7r>)Rs5pk8QG%WRS;zF3mn0H>-+q@*_z$+(ThN0@v8i&$1{}0Lbur+h(CW1
z&7Vm_AAY^>lNR^)=ES!f3}0%`Rai~;9}#>UMAodA*fl*-lAiivZJ%xsO_yXZf!i{r
z;B#IdQQ7gU5;(ap6Iw}X3A$9nvE)bm)zux$b*)}SvZ|MBGFrSPLf5~dKkf0FE1!=m
z*KOIHB8;<upl4bAURsMVw0A)`hs(V?`oT<2>cFuWsvB*@>BrQM_DI7C8Pblhtc_lt
zU@vX5Tq@2AUnZXl<UF9RvhiZr=+-;=w&$`pIukC|73N-<>E{`|!Jd!di9G%FTUL7B
zpguj%QfyN*{o&&}QI2Qhgp3A5LKYMFM<ZFqs4j5yDzvvta~HNJG%6LnUe1To&i0Tq
z3_UuGiJM3t@@sjd$uea}D{t&y40U;P|K=pmd#%J<f+!mY9Xsp|qj(oo$`$K?f-HtR
z55MXwy1AK|En*2!Nu^P<8qu*1+++{W8iW}VF0^=rUUqdwKJ0t5SUS}C*^yf$mGVrD
zmsa-Vc<%|_h+}+pO2Zl6U9{%oN2iJ6$dyVsBM7q$KD7!OjAB@LPrHjTe5(m&vYxEJ
zPqWz7Hg35u<nY4T1<@M9>ZwTaYAxS8?q6sWXjiq{<T55X-@Lo)&|LLRK{R!wrD=u#
z$m_U<19{A(EWO+X1&=9!(El16m;DGGF(mq<b@&D@C@DQa@~)-tQ{9zr%m5L|_ydmv
ztw}6i?Vd@BX^uhNK22|P?mjlBlD)$~H6~$o{%X}!1<S%hnhn|1ke&FED)QBNc8w!s
z{(Y@fqI!NCpWk=oci&$TE~<;-xn5q~RN8bq|8{KAGicuQi<XoJhIJA>b2_Er-zqLy
z$%K5cx1J6u7xy-O_RmYZZf}}r8k*$1mw2hQ$w~dk{23gywcppa?=R?TJwyFw+`@RE
zo{meI^~ifCxZROFoii)}6wfYet)Ay$3TcIDiJCYEXg=_mEobaAocZ$V>{<En@VxZ&
z5XLibsT*za@LHr<Tek8jr7Bl5yO^VVS6)SLYU<_jikC0n1uUsaKZ{cp7TS+%YN4HA
zXK!qxp>1v!^BN92&XNZ2Ud%Kx^$RCB27|Mf+Fo(PRT-?czkWUR;8Rf0rw2pt0-`6P
zqu1J&kkU+-#9Jnsn_60wIzmH<<jzbv7teY_)ZWM1u+5b5zmGH`#T4&D>BtcDnrLRO
z1_rJY5ghn>gMPA8=ia^zAD8R~=4&b5m$TPn)R^es67RD=_mJG_9*vO(<02*Z;ZVjB
zg*2zQ(1QuMw8%T{&Bx-s-z45xHmq_QEoh?RC^EbM3^HM_B-b~WR2n)u^(L?NTEI%%
zxRXuvlm5n<!dQ>5uglzf930#WZqOYxnR1*f%p;V3SJv^E>5gs7JwNWWcE^~m`p9lC
z%B&3d^}CKi>=J0ZjJw?V1M1ob$I7V4pY!g!eEICvyn++@!wH?Y6{Cd$C(_s2>|`iz
zztF_+Y9S>a<(6xZzkDgQ=IG8~SwnXbGT9hPVO;l87iu-;SA7W4L(q+I#myNWQch?J
z4IgA)wWhghIVSpG1Tr}&#?P!yzgCDiZ^2U;GFUG^G&j(&fAiYC?v58ldg6}0Lek1y
zf|-3?weL!9zRiI#s9AqTWEPRXd!8|MDs(?P($d?|c&xy)?BfjuhYCTy2SS{IlcfrQ
zH<OSzR-YB{=-*qICHm0)U8LfhI@c-VBR*OYOjT}!=qejvQMYWKx=Btx=%|aWE1a14
zJcRSG9H*Y=5`zt%kW-0ZkMV$hYYOlZ;k~y2|7dt-`5AqN9%jg}fv=Y<jNW8K@`eoS
zdlXK4)N}5AnJ?dLznsQd?xg2aF87DuMOS1?vT?^Bal99JAqlHEj#$)kUPc~EW1S~-
zlh>>`rYGy#D^n_*zcxQS9CCj6GhZx|@Y=ICay<=1lG=G3FBER&roTeib5xRb^(u@w
zMr|Ovq5NNIo;p=8tjfs0L?DROgE#9}vu(pwGs-`_P$HLF=VVcrj>#GpozBtcBcE{@
zUQ~TZwxssGylv2OG~wN>(e32In~zT2J{CTxS4vc_S1*fU>ApRaz@x1s@8=}DCam<T
zk~sX@{0UpZ`q4p>hC=jQ<o8t3^7{i6jF#za84l;3b7T;BdbW}gEe%cCb6JJU3f?Qb
zSiXME`NO$yZ*(h=P)F0}B`F-4rP|QVZzK6xN@n@<0>XJFzKGgU(ftrAy-)L6-OV2d
z0?iVlZq#w;>0;pKtE*|3%oIq|2ajS3@;l^{*Ar4Iy~U#EBcAY``v*3j^f1HlMoPgj
z;`F!Cg0`X{yBzh__;U9vBkqOHodrggbhY1?ot_?<D%XhXNPWrO-8Hy)iG0jBWzdD2
z&S>;@;_@MVWtPv40#F;(TteS(bAHvym*}3oO`)15N(qQ}I555DDz;LMK&W(Q<j-7{
zY|d_ujxsd8oALpNG=2YpYdSnh@CB)08V^;z{N>Vu%Kf@L9fCMPZ9!99X-ZS*G`cg4
zNt~GTqI(@qvn0W=C0?fmjZ$(VJrQX!Tx>{G#wXP<YAvVQ9CFmI`AXt7KK2ibR*4*$
z4RMqdRW(9)-8gTZ>=<!*^>CGj@{^q`@-uaVoq^B{Tg6;W)A!>Ok2R|x+LF!&QAAg&
z(H|zVZJM)b{k~u?2rjN?u-1ylEHrz331=Xu%JlG*teJbs>D5_sZS&wlC5F1&cP5wH
zvBXS&kuWaors6c&=$GVnmlMg=n;9qg$X(8tCkhQ4zH&1QxZ~?LsgY2qXI1ah#iQ&d
zJpST!Y@K_)-H_CUfyYI?^6XEl^ClQP1HG0vtqu4#9x(kA{HRCSA^u1S*R$KbYLnuj
zqgJu1vEGkp8Ozk@#CW=WwVLi;B5R#F&~C;ZTlzG+=vL$r-h`L!0gw^b_+p!Of5aCr
z-M1SG(+=~Mz2%2_IIRTN*$WeN8q7_m@695X!$tFDpT0bOu&&g23Uk{gQJvA!wDvRk
zr<A^jWd3F$Z;}L0kJ5QP+W01{LvR62Wt&*iaA@lEcdaAJ)#-!Fua~)QO~f4BkPMBP
zI5BB_G~}-6dnW;@lZP`i8e@bhAFL64la0Q`!nkmcB$DK;-<1PAkD&*c4+s-KpE^vF
ze9E8Xcu?&lDL-uU;pY0Iey@UvA&o2>Bt8sd4P%HyZj&qdBu)KOV{utdiG9Px7&q=Q
z|Co2?_D59ik<@2JJHMz~;<zWP37Mg^4)JZYwDT293({azU*pQ+(zx3~s$z0~BXuqG
z)@$?79``FTXSM3i*Qu}jtQk$6naX(HqAPKm_O@I>Qo($!a^;yyU-(q23~E!bl_m1r
zro1EP6`>%)&3r$K;~J2;>pbDy#7p-lyo8UtL@g-83z}Cj(mwl%j~*~34v!^8Fja}*
z(&7i~>>eH?I)3;ZQ&rS~#|W~Ek{2EdGgeXcM&DwpN~pS3?`8H;{#&mL%ZF?_=b@M|
zN@l0G){jXS5X5te_DM5lwkJexAIfU_5EoWU_TFWxs%OM_$hT)~DLKr7a>a2?thw%B
z*h*~Qs(Q2M!KIO=soWU2Q1ne@_tvV-Yp^E=--<4Ng&Zfpy>Kl2+`<Ud{?$0+RpAuM
z`6<NMQ?oDZ#Ni%uE#Fqds-h&aJI{Xd5@LPRY1UXwNL%mxcHIN_Af+>C<@50cFQIcs
z-sblZ2vq?#%#J&t&DpKeBcme^Hnz!Ns_t2}vnWh3L`j`3Y6w;vb79D#K0%R8tw*g0
z5#VE_BD>|Q$<d^bqY}rl(n^;|Gvz#t>_}(2M{h<PrRKU$yIOnL{Ue3N<Du})jh<^M
zB2l59sC3$q@@M&69v4R6SAW&zyxAUUw$d?~W<A1BB-J|_KmEz-NJWp`#Vil&dPRuL
z6VKdF#!QO?2@|Rn=bktBrG(jVZ5BRXl8SyVvId{Lbs&lmE}E?FJ9RmtdS#@Y<blz1
zq-&=$@$mIHo==I`HS*q?@}rlgglE+`RMi7su*ms;&QX3-eB#^J!yB2j?wgvKDEnHf
zjMtbN!jv2Rytk^KRnv$rGI6Ut%c|_OfDB`EdyBpAe6JVFjFPL=PS)YquRA!y>Cm1W
zh!v)}t&@MoLQ(YF?AZ{(<Jsh0n!z}h!c^(1JdW=OTmSHpq#K33r&oq^1aD1#*&mrK
zv|jQVHm2m{RWhW(Hl3T4JTmVRO81~rA#+98V$r+snRR2eshYAutEZ%Y>9f+#sZ~?v
z=bw`dvfb+IQ+w2+Rny)iAj(<hD?`*#Im%1D(nsUg*;+mie|?(6GjMa(&3!!LNTF$<
z(yc4%s-3(ftu9=sm&uli#txI%`Avu9&&QS3D$>Z(QDmhx($R#Q5|BC<d)~(^E7Fa%
zCMn9S(;QPOD=B#@o(zF&7v6TVGd4ABN{h{wEWiHpjQ5^UtHjfa$9;#Rtx_3Ml1dn7
zRmujR^YA-EXipbjV*6If71nya>LhdD$w9lyylI6+ULI`0b6Dx)Vq8p`E_>n7$)2v$
z7FPntEhiW0J<q<hz2i*Omi9Hw*>%cPOUe4n7p?510OidX&&sE>vWZW=SVp(=)vlDz
z4^=)(qJG)x3|D%mBH55M+*<5PCcFBcG(A%_R(F!wKDpC8qL>ovbxNo@i;G!b-KD@<
z_{k=>jZMDZfQ_$JiSTp;n}(eZb<HiG`eASHg0K6|KIAO!>1LVlqhnZIbJpfAHcOUw
zEuBf)#B^}iD%_vZOC4CS%4!RDX{+er?3~D|W0U)K*2CC$rfc(})GTF5D@(l2{gnZ!
zB;RKj-M_j%Gf^Bg`L@|YEV0}(A_Q^Zyk(VI=y_ace`!MQ;Nmg&=wn|`e4yOlc-by0
zj)i^9WyXpdHTvC>5u*_+K~*o)>|#~YIePz1Y;e1mhd5#-*D4;BGdX_c<wUsrWaooV
z&4msz>kb=d-1;{7;vajrkytY;Y>+Q~bX?pU=f%e<!B{2+j{UKD1^YAOVt1f-*T!#g
z83`~z3I>7Uz(#HZ2NV28W4`|}+zTkEx;g-1RvBwk8(`lt1V|q+GY9r>x<XEi0{K8D
zoDe=Ga67oUwHf5Jj4=?#HFtCcgsM)qww5MVU|1Il0rxci8gho<gVI1Y0?>e(jg*Xp
zf&s9-dPkp>PXpYqt%CwWvlzguKYif9R_@>BpiGw(*wGE_fdj9R0vo?{kU%CP_yGlQ
zAc(x9v|A3!cAtaaVc0Fb9o-$h-;cVaySI~L!Q_suATwkqsEh%2>jPb|z3UZpS;pEN
zNJRl&ieRK_;s`M{w>Gxl-pCFy1p3#S6xb7MY!2-7-a5noH?lIY#rN007!Y-F)xBc@
zF*Y#;Dgt_ZE98$KN1mO&=K*wnmcXYA0JZ=cuq|W2UE&Vl4taY6YhZ)E0Qj~5A&3AD
z_`_$={B|9_rDbUPYw>L@`~hDF45$MNMF3Ul2=JpYIN%hGhNFN3QW7o!9I(Ju{25df
zyoQ4zfdi0iAPyWb{3uEMH3@$38XN_dL}SFzzyT-%s^f4_un7zXe?UrNK|K_lUko^)
zQ8=(pj2IGBM@sU8Rim+RPyve*#{dTu0*Nnzf`OJ{C9t4nfJNZ7Ewh2jpkRAYC>-n$
z6b=oZp%5rg4=4iPI)Hj8NesRP3>3R1+;$!>L`iN}y?t$49rOsOWvfnp&<GS9wq-mD
zv>FQ)16x4B(4Y{7z_-qiMgW!JcY*E<0FXR{2hxw>H*&HE->`s38y*zkU<1icKLBZK
zV+-;R{<D{ez%-g2xc|&8*+n^^5P0*(UvQ12HGmzEM}F`CkMvF+h_L~X)&Ve=14IXy
zsz6=`kVT?qqi$^u!VQ9-bSghnuzt=#pwy4e;|`>Ja^}Vk5K<k|tyC!dCUX^1{5E3X
zuqDLr08}AGY@Olv?x>LBcV?)N;?wSd!`3x?#;nR#LZk{QW-HwUuxl%$42A&Gy|yPf
z@EQ>b96-q?SAkhCV+ACEiESUncXP*8dtjPDffrPeg7aeQ4}wMk7=oBWe#(G!9`Fxh
zy)A=bfSj4%WKbv!OsoB+9DqzPDf>4W8r-7#iwuTAf~n-c$N*h{TYr}!!0!329O4h<
zkl=oe-^%fWS#Q6}U}z+ek@l+$iUBvV{3hcE$KzKS3=QTW{wCuGlU09}!D0M+Y=L30
zd)k2gp$#}3wnrC^f&uw{zqRoPn}PS({NB${IFkPlx?n!+uk|6IdwoM7pnJv_j^*Ej
z0|f9=nmzR)!2RpLwSflri2o+TqW0<{_x3&F5BNi1!7RaF>qEl8OuXM@NbprIzse9m
z;@IypB(U#)j|>TJLj6@2i2?U!|0d%H)3|?=0n=)a3<LdxE_RPD3Jwy^ul1o&K=Rw~
zG8l9ZUQlo_srq-_z1Tou!EKYjmO}%tMEYHZLW4B)YdI8lFHX_Wz1Tn__TnB~bbqf8
zfX1G_LL>I#6ph}C4KzQv3Gr9^FtEMYKx6h|91XnvX|FE-9$W#_ZI8_u3`pd^wTpo5
z8D9)?FD9`t=$<hGUX8T3K7MeO_^k~L|6W~w{yo^hA^`^fO&1F+*L!4e@O=)ywt+=}
zsf@qLfOTSzjDL^6z^VYgZsiwU7#h4m{aps%jTc9I10bi;9{(aRRdY9B#YF;HoZwFa
l0Fd~kVjCMEEgG<NE926^(ZJpjznlR>4U}bLlT^Zy{vXccD%}78

literal 0
HcmV?d00001


From 3f51637bcfcee9ed92b37c1d65b8dd39c2afdf82 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 09:36:35 +0800
Subject: [PATCH 12/46] 'commit'

---
 dsLightRag/Test/T1_Login.py      |  5 -----
 dsLightRag/Test/T3_GetArticle.py | 28 +++++++++++++++-------------
 2 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/dsLightRag/Test/T1_Login.py b/dsLightRag/Test/T1_Login.py
index 8c4c57e0..e313e8ab 100644
--- a/dsLightRag/Test/T1_Login.py
+++ b/dsLightRag/Test/T1_Login.py
@@ -4,13 +4,8 @@
 # 微信爬爬猫---公众号文章抓取代码分析
 # https://blog.csdn.net/yajuanpi4899/article/details/121584268
 
-import datetime
 import json
 import logging
-import random
-import re
-
-import requests
 
 """
 # 查看selenium版本
diff --git a/dsLightRag/Test/T3_GetArticle.py b/dsLightRag/Test/T3_GetArticle.py
index 02e9cce8..6dae6c0b 100644
--- a/dsLightRag/Test/T3_GetArticle.py
+++ b/dsLightRag/Test/T3_GetArticle.py
@@ -1,15 +1,17 @@
-"""
-安装pdfkit库
-https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service as ChromeService
+from selenium.webdriver.common.by import By
 
-我是在Windows上开发的，所以，下载的是：【注意要科学上网下载，否则太慢了~】
-https://release-assets.githubusercontent.com/github-production-release-asset/131323182/3200f380-aba8-11ea-8942-42fa5e27a312?sp=r&sv=2018-11-09&sr=b&spr=https&se=2025-07-15T02%3A10%3A32Z&rscd=attachment%3B+filename%3Dwkhtmltox-0.12.6-1.mxe-cross-win64.7z&rsct=application%2Foctet-stream&skoid=96c2d410-5711-43a1-aedd-ab1947aa7ab0&sktid=398a6654-997b-47e9-b12b-9515b896b4de&skt=2025-07-15T01%3A10%3A07Z&ske=2025-07-15T02%3A10%3A32Z&sks=b&skv=2018-11-09&sig=IYNB2Gi%2FZ9tZfPXmo7PbqjbxmcLULpP%2Bex2z6lp2DvE%3D&jwt=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJnaXRodWIuY29tIiwiYXVkIjoicmVsZWFzZS1hc3NldHMuZ2l0aHVidXNlcmNvbnRlbnQuY29tIiwia2V5Ijoia2V5MSIsImV4cCI6MTc1MjU0MjU3NSwibmJmIjoxNzUyNTQyMjc1LCJwYXRoIjoicmVsZWFzZWFzc2V0cHJvZHVjdGlvbi5ibG9iLmNvcmUud2luZG93cy5uZXQifQ.LyZXiO_mRK2qX99CTJtVwypU4DLsK-_Js0wspzsL0Y4&response-content-disposition=attachment%3B%20filename%3Dwkhtmltox-0.12.6-1.mxe-cross-win64.7z&response-content-type=application%2Foctet-stream
-解压到D:\wkhtmltox中，还要注意把路径加到环境变量中
+url = 'http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd'
 
-conda activate py310
-pip3 install pdfkit
-"""
-import pdfkit
-path_wk = r'D:\wkhtmltox\bin\wkhtmltopdf.exe' #wkhtmltopdf安装位置
-config = pdfkit.configuration(wkhtmltopdf = path_wk)
-pdfkit.from_url('http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd', 'out.pdf',configuration=config)
\ No newline at end of file
+options = Options()
+options.add_argument('-headless')  # 无头参数，调试时可以注释掉
+service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+driver = webdriver.Chrome(service=service, options=options)
+driver.get(url)
+# 可以只要txt
+html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
+# 第一行是标题，分离出来
+title = html_content.split('\n')[0]
+print(title)

From 78633b321cb5981b8a80ba0a94aed0022cf66f67 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 09:43:31 +0800
Subject: [PATCH 13/46] 'commit'

---
 dsLightRag/Test/T3_GetArticle.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/dsLightRag/Test/T3_GetArticle.py b/dsLightRag/Test/T3_GetArticle.py
index 6dae6c0b..baaecd48 100644
--- a/dsLightRag/Test/T3_GetArticle.py
+++ b/dsLightRag/Test/T3_GetArticle.py
@@ -15,3 +15,30 @@ html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
 # 第一行是标题，分离出来
 title = html_content.split('\n')[0]
 print(title)
+
+# 按行遍历html_content，当发现空行时，删除空行前面的内容，只保留后面的内容
+lines = html_content.split('\n')
+content_after_empty_line = ""
+found_empty_line = False
+
+for line in lines:
+    if not found_empty_line and line.strip() == "":
+        # 找到第一个空行
+        found_empty_line = True
+        continue
+
+    if found_empty_line:
+        # 空行后的内容添加到结果中
+        content_after_empty_line += line + "\n"
+
+# 如果没有找到空行，保留原始内容
+if not found_empty_line:
+    content_after_empty_line = html_content
+
+for x in content_after_empty_line.split("\n"):
+    if x.strip() == "" :
+        continue
+    print(x)
+
+# 关闭浏览器
+driver.quit()

From b8b4b08b3fa73d3fcd134fbeb56a3e25f27bc6ae Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 10:04:49 +0800
Subject: [PATCH 14/46] 'commit'

---
 dsLightRag/Test/T3_GetArticle.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/dsLightRag/Test/T3_GetArticle.py b/dsLightRag/Test/T3_GetArticle.py
index baaecd48..2f9c9e27 100644
--- a/dsLightRag/Test/T3_GetArticle.py
+++ b/dsLightRag/Test/T3_GetArticle.py
@@ -35,10 +35,7 @@ for line in lines:
 if not found_empty_line:
     content_after_empty_line = html_content
 
-for x in content_after_empty_line.split("\n"):
-    if x.strip() == "" :
-        continue
-    print(x)
-
+content_after_empty_line = content_after_empty_line.replace("\n\n", "\n")
+print(content_after_empty_line)
 # 关闭浏览器
 driver.quit()

From fca710e0719b982411d4b9aed2dc75ea026c72d2 Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Tue, 15 Jul 2025 10:15:43 +0800
Subject: [PATCH 15/46] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../api/controller/DmController.py            |  2 +-
 .../api/controller/DocumentController.py      |  2 +-
 .../api/controller/LoginController.py         | 28 ++++++++++
 .../api/controller/QuestionController.py      |  2 +-
 .../api/controller/TestController.py          |  2 +-
 .../api/controller/ThemeController.py         |  2 +-
 .../api/controller/UserController.py          | 32 +++++++++++
 dsAiTeachingModel/main.py                     | 14 +++--
 dsAiTeachingModel/routes/__init__.py          |  3 +-
 dsAiTeachingModel/tasks/BackgroundTasks.py    | 48 +++++++++++++++--
 dsAiTeachingModel/utils/Database.py           | 14 ++++-
 dsAiTeachingModel/utils/DocxUtil.py           | 54 ++++++++++++++++++-
 dsAiTeachingModel/utils/LightRagUtil.py       | 47 ++++++++++++++--
 13 files changed, 228 insertions(+), 22 deletions(-)
 create mode 100644 dsAiTeachingModel/api/controller/UserController.py

diff --git a/dsAiTeachingModel/api/controller/DmController.py b/dsAiTeachingModel/api/controller/DmController.py
index 7dbd0e51..2f719317 100644
--- a/dsAiTeachingModel/api/controller/DmController.py
+++ b/dsAiTeachingModel/api/controller/DmController.py
@@ -1,4 +1,4 @@
-# routes/LoginController.py
+# routes/DmController.py
 
 from fastapi import APIRouter, Depends
 
diff --git a/dsAiTeachingModel/api/controller/DocumentController.py b/dsAiTeachingModel/api/controller/DocumentController.py
index d88da710..bd0e2998 100644
--- a/dsAiTeachingModel/api/controller/DocumentController.py
+++ b/dsAiTeachingModel/api/controller/DocumentController.py
@@ -1,4 +1,4 @@
-# routes/LoginController.py
+# routes/DocumentController.py
 import os
 
 from fastapi import APIRouter, Request, Response, Depends, UploadFile, File
diff --git a/dsAiTeachingModel/api/controller/LoginController.py b/dsAiTeachingModel/api/controller/LoginController.py
index 307fd3b6..b3368d35 100644
--- a/dsAiTeachingModel/api/controller/LoginController.py
+++ b/dsAiTeachingModel/api/controller/LoginController.py
@@ -129,3 +129,31 @@ async def login(request: Request, response: Response):
     else:
         return {"success": False, "message": "用户名或密码错误"}
 
+
+# 【Base-Login-3】通过手机号获取Person的ID
+@router.get("/getPersonIdByTelephone")
+async def get_person_id_by_telephone(request: Request):
+    telephone = await get_request_str_param(request, "telephone", True, True)
+    if not telephone:
+        return {"success": False, "message": "手机号不能为空"}
+    select_user_sql: str = "SELECT person_id FROM t_sys_loginperson WHERE telephone = '" + telephone + "' and b_use = 1 "
+    userlist = await find_by_sql(select_user_sql,())
+    user = userlist[0] if userlist else None
+    if user:
+        return {"success": True, "message": "查询成功", "data": {"person_id": user['person_id']}}
+    else:
+        return {"success": False, "message": "未查询到相关信息"}
+
+
+
+# 【Base-Login-4】忘记密码重设，不登录的状态
+@router.post("/resetPassword")
+async def reset_password(request: Request):
+    person_id = await get_request_str_param(request, "person_id", True, True)
+    password = await get_request_str_param(request, "password", True, True)
+    if not person_id or not password:
+        return {"success": False, "message": "用户ID和新密码不能为空"}
+    password_md5 = md5_encrypt(password)
+    update_user_sql: str = "UPDATE t_sys_loginperson SET original_pwd = '" + password + "', pwdmd5 = '" + password_md5 + "' WHERE person_id = '" + person_id + "'"
+    await execute_sql(update_user_sql)
+    return {"success": True, "message": "密码修改成功"}
\ No newline at end of file
diff --git a/dsAiTeachingModel/api/controller/QuestionController.py b/dsAiTeachingModel/api/controller/QuestionController.py
index 89456bc5..48b7ed39 100644
--- a/dsAiTeachingModel/api/controller/QuestionController.py
+++ b/dsAiTeachingModel/api/controller/QuestionController.py
@@ -1,4 +1,4 @@
-# routes/LoginController.py
+# routes/QuestionController.py
 
 from fastapi import APIRouter, Request, Response, Depends
 from auth.dependencies import *
diff --git a/dsAiTeachingModel/api/controller/TestController.py b/dsAiTeachingModel/api/controller/TestController.py
index 4a572ff3..5c6a8ed5 100644
--- a/dsAiTeachingModel/api/controller/TestController.py
+++ b/dsAiTeachingModel/api/controller/TestController.py
@@ -1,4 +1,4 @@
-# routes/LoginController.py
+# routes/TestController.py
 
 from fastapi import APIRouter, Request
 
diff --git a/dsAiTeachingModel/api/controller/ThemeController.py b/dsAiTeachingModel/api/controller/ThemeController.py
index 297817d1..3bd9fcd5 100644
--- a/dsAiTeachingModel/api/controller/ThemeController.py
+++ b/dsAiTeachingModel/api/controller/ThemeController.py
@@ -1,4 +1,4 @@
-# routes/LoginController.py
+# routes/ThemeController.py
 
 from fastapi import APIRouter, Depends
 from utils.ParseRequest import *
diff --git a/dsAiTeachingModel/api/controller/UserController.py b/dsAiTeachingModel/api/controller/UserController.py
new file mode 100644
index 00000000..e23d8f5f
--- /dev/null
+++ b/dsAiTeachingModel/api/controller/UserController.py
@@ -0,0 +1,32 @@
+# routes/UserController.py
+import re
+
+from fastapi import APIRouter, Request, Response, Depends
+from auth.dependencies import *
+from utils.Database import *
+from utils.ParseRequest import *
+
+# 创建一个路由实例,需要依赖get_current_user,登录后才能访问
+router = APIRouter(dependencies=[Depends(get_current_user)])
+
+# 【Base-User-1】维护用户手机号
+@router.post("/modifyTelephone")
+async def modify_telephone(request: Request):
+    person_id = await get_request_str_param(request, "person_id", True, True)
+    telephone = await get_request_str_param(request, "telephone", True, True)
+    # 校验手机号码格式
+    if not re.match(r"^1[3-9]\d{9}$", telephone):
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="手机号码格式错误")
+    # 校验手机号码是否已被注册
+    select_telephone_sql: str = "select * from t_sys_loginperson where b_use = 1 and telephone = '" + telephone + "' and person_id <> '" + person_id + "'"
+    userlist = await find_by_sql(select_telephone_sql, ())
+    if len(userlist) > 0:
+        return {"success": False, "message": "手机号码已被注册"}
+    else:
+        update_telephone_sql: str = "update t_sys_loginperson set telephone = '" + telephone + "' where person_id = '" + person_id + "'"
+        await execute_sql(update_telephone_sql)
+        return {"success": True, "message": "修改成功"}
+
+
+# 【Base-User-2】维护用户密码
+# @router.post("/modifyPassword")
diff --git a/dsAiTeachingModel/main.py b/dsAiTeachingModel/main.py
index 18a578f1..8f99a901 100644
--- a/dsAiTeachingModel/main.py
+++ b/dsAiTeachingModel/main.py
@@ -1,6 +1,7 @@
 import threading
-import logging
+
 import uvicorn
+import asyncio
 
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.staticfiles import StaticFiles
@@ -18,11 +19,12 @@ logging.basicConfig(
 )
 
 async def lifespan(app: FastAPI):
-    # 启动线程
-    thread = threading.Thread(target=train_document_task, daemon=True)
-    thread.start()
     # 创建数据库连接池
     await init_database()
+
+    # 启动异步任务
+    asyncio.create_task(train_document_task())
+
     yield
     await shutdown_database()
 
@@ -41,8 +43,10 @@ app.add_middleware(
 app.mount("/static", StaticFiles(directory="Static"), name="static")
 
 # 注册路由
-# 登录相关
+# 登录相关(不用登录)
 app.include_router(login_router, prefix="/api/login", tags=["login"])
+# 用户相关
+app.include_router(user_router, prefix="/api/user", tags=["user"])
 # 主题相关
 app.include_router(theme_router, prefix="/api/theme", tags=["theme"])
 # 文档相关
diff --git a/dsAiTeachingModel/routes/__init__.py b/dsAiTeachingModel/routes/__init__.py
index 5bde8674..4fa720b9 100644
--- a/dsAiTeachingModel/routes/__init__.py
+++ b/dsAiTeachingModel/routes/__init__.py
@@ -5,6 +5,7 @@ from api.controller.ThemeController import router as theme_router
 from api.controller.QuestionController import router as question_router
 from api.controller.TestController import router as test_router
 from api.controller.DmController import router as dm_router
+from api.controller.UserController import router as user_router
 
 # 导出所有路由
-__all__ = ["login_router", "document_router", "theme_router", "question_router", "dm_router", "test_router"]
+__all__ = ["login_router", "document_router", "theme_router", "question_router", "dm_router", "test_router", "user_router"]
diff --git a/dsAiTeachingModel/tasks/BackgroundTasks.py b/dsAiTeachingModel/tasks/BackgroundTasks.py
index e90bdc52..d43dc190 100644
--- a/dsAiTeachingModel/tasks/BackgroundTasks.py
+++ b/dsAiTeachingModel/tasks/BackgroundTasks.py
@@ -1,12 +1,52 @@
+import asyncio
 import logging
 import time
 
+from utils.Database import *
+from utils.DocxUtil import get_docx_content_by_pandoc
+from utils.LightRagUtil import initialize_pg_rag
+
+# 使用PG库后，这个是没有用的,但目前的项目代码要求必传，就写一个吧。
+WORKING_DIR = f"./output"
+
 # 后台任务，监控是否有新的未训练的文档进行训练
-def train_document_task():
+async def train_document_task():
     print("线程5秒后开始运行【监控是否有新的未训练的文档进行训练】")
-    time.sleep(5)  # 线程5秒后开始运行
+    await asyncio.sleep(5)  # 使用 asyncio.sleep 而不是 time.sleep
     # 这里放置你的线程逻辑
     while True:
         # 这里可以放置你的线程要执行的代码
-        logging.info("线程正在运行")
-        time.sleep(1000)  # 每隔10秒执行一次
+        logging.info("开始查询是否有未训练的文档")
+        no_train_document_sql: str = " SELECT * FROM t_ai_teaching_model_document WHERE is_deleted = 0 and train_flag = 0 ORDER BY create_time DESC"
+        no_train_document_result = await find_by_sql(no_train_document_sql, ())
+        if not no_train_document_result:
+            logging.info("没有未训练的文档")
+        else:
+            logging.info("存在未训练的文档" + str(len(no_train_document_result))+"个")
+            # document = no_train_document_result[0]
+            # print("开始训练文档：" + document["document_name"])
+            # theme = await find_by_id("t_ai_teaching_model_theme", "id", document["theme_id"])
+            # # 训练开始前，更新训练状态
+            # update_sql: str = " UPDATE t_ai_teaching_model_document SET train_flag = 1 WHERE id = " + str(document["id"])
+            # execute_sql(update_sql)
+            # document_name = document["document_name"] + "." + document["document_suffix"]
+            # logging.info("开始训练文档：" + document_name)
+            # workspace = theme["short_name"]
+            # docx_name = document_name
+            # docx_path = document["document_path"]
+            # logging.info(f"开始处理文档：{docx_name}, 还有%s个文档需要处理！", len(no_train_document_result) - 1)
+            # # 训练代码开始
+            # try:
+            #     rag = await initialize_pg_rag(WORKING_DIR=WORKING_DIR, workspace=workspace)
+            #     # 获取docx文件的内容
+            #     content = get_docx_content_by_pandoc(docx_path)
+            #     await rag.insert(input=content, file_paths=[docx_name])
+            # finally:
+            #     if rag:
+            #         await rag.finalize_storages()
+            # # 训练结束，更新训练状态
+            # update_sql: str = " UPDATE t_ai_teaching_model_document SET train_flag = 2 WHERE id = " + str(document["id"])
+            # execute_sql(update_sql)
+
+            # 添加适当的等待时间，避免频繁查询
+            await asyncio.sleep(60)  # 每分钟查询一次
diff --git a/dsAiTeachingModel/utils/Database.py b/dsAiTeachingModel/utils/Database.py
index 85580029..4ac15243 100644
--- a/dsAiTeachingModel/utils/Database.py
+++ b/dsAiTeachingModel/utils/Database.py
@@ -204,4 +204,16 @@ async def delete_by_id(table_name, property_name, property_value):
             raise Exception(f"为表[{table_name}]删除数据失败: {e}")
     else:
         logging.error("参数不全")
-        return False
\ No newline at end of file
+        return False
+
+
+# 执行一个SQL语句
+async def execute_sql(sql):
+    logging.debug(sql)
+    try:
+        async with pool.acquire() as conn:
+            await conn.fetch(sql)
+    except Exception as e:
+        logging.error(f"数据库查询错误: {e}")
+        logging.error(f"执行的SQL语句: {sql}")
+        raise Exception(f"执行SQL失败: {e}")
\ No newline at end of file
diff --git a/dsAiTeachingModel/utils/DocxUtil.py b/dsAiTeachingModel/utils/DocxUtil.py
index 82e26d2c..6c8051fc 100644
--- a/dsAiTeachingModel/utils/DocxUtil.py
+++ b/dsAiTeachingModel/utils/DocxUtil.py
@@ -1,8 +1,56 @@
+import logging
 import os
 import subprocess
 import uuid
 
+from PIL import Image
+import os
+
+# 在程序开始时添加以下配置
+logging.basicConfig(
+    level=logging.INFO,  # 设置日志级别为INFO
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+# 或者如果你想更详细地控制日志输出
+logger = logging.getLogger('DocxUtil')
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+logger.addHandler(handler)
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 
+def resize_images_in_directory(directory_path, max_width=640, max_height=480):
+    """
+    遍历目录下所有图片并缩放到指定尺寸
+    :param directory_path: 图片目录路径
+    :param max_width: 最大宽度
+    :param max_height: 最大高度
+    """
+    # 支持的图片格式
+    valid_extensions = ('.jpg', '.jpeg', '.png', '.bmp', '.gif')
+
+    for root, _, files in os.walk(directory_path):
+        for filename in files:
+            if filename.lower().endswith(valid_extensions):
+                file_path = os.path.join(root, filename)
+                try:
+                    with Image.open(file_path) as img:
+                        # 计算缩放比例
+                        width, height = img.size
+                        ratio = min(max_width / width, max_height / height)
+                        # 如果图片已经小于目标尺寸，则跳过
+                        if ratio >= 1:
+                            continue
+                        # 计算新尺寸并缩放
+                        new_size = (int(width * ratio), int(height * ratio))
+                        resized_img = img.resize(new_size, Image.Resampling.LANCZOS)
+
+                        # 保存图片（覆盖原文件）
+                        resized_img.save(file_path)
+                        logger.info(f"已缩放: {file_path} -> {new_size}")
+                except Exception as e:
+                    logger.error(f"处理 {file_path} 时出错: {str(e)}")
 def get_docx_content_by_pandoc(docx_file):
     # 最后拼接的内容
     content = ""
@@ -15,6 +63,9 @@ def get_docx_content_by_pandoc(docx_file):
     os.mkdir("./static/Images/" + file_name)
     subprocess.run(['pandoc', docx_file, '-f', 'docx', '-t', 'markdown', '-o', temp_markdown,
                     '--extract-media=./static/Images/' + file_name])
+    # 遍历目录 './static/Images/'+file_name 下所有的图片，缩小于640*480的尺寸上
+
+    resize_images_in_directory('./static/Images/' + file_name+'/media')
     # 读取然后修改内容，输出到新的文件
     img_idx = 0  # 图片索引
     with open(temp_markdown, 'r', encoding='utf-8') as f:
@@ -23,8 +74,9 @@ def get_docx_content_by_pandoc(docx_file):
             if not line:
                 continue
             # 跳过图片高度描述行
-            if line.startswith('height=') and line.endswith('in"}'):
+            if line.startswith('height=') and (line.endswith('in"}') or line.endswith('in"')):
                 continue
+            # height="1.91044072615923in"
             # 使用find()方法安全地检查图片模式
             is_img = line.find("![](") >= 0 and (
                     line.find(".png") > 0 or
diff --git a/dsAiTeachingModel/utils/LightRagUtil.py b/dsAiTeachingModel/utils/LightRagUtil.py
index 528f5963..e791c4a8 100644
--- a/dsAiTeachingModel/utils/LightRagUtil.py
+++ b/dsAiTeachingModel/utils/LightRagUtil.py
@@ -1,9 +1,7 @@
 import logging
 import logging.config
 import os
-
 import numpy as np
-
 from lightrag import LightRAG
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
@@ -25,7 +23,7 @@ def configure_logging():
 
     log_dir = os.getenv("LOG_DIR", os.getcwd())
     log_file_path = os.path.abspath(
-        os.path.join(log_dir, "./logs/lightrag.log")
+        os.path.join(log_dir, "./Logs/lightrag.log")
     )
 
     print(f"\nLightRAG log file: {log_file_path}\n")
@@ -97,10 +95,13 @@ async def embedding_func(texts: list[str]) -> np.ndarray:
     )
 
 
-async def initialize_rag(working_dir):
+async def initialize_rag(working_dir, graph_storage=None):
+    if graph_storage is None:
+        graph_storage = 'NetworkXStorage'
     rag = LightRAG(
         working_dir=working_dir,
         llm_model_func=llm_model_func,
+        graph_storage=graph_storage,
         embedding_func=EmbeddingFunc(
             embedding_dim=EMBED_DIM,
             max_token_size=EMBED_MAX_TOKEN_SIZE,
@@ -139,4 +140,40 @@ def create_embedding_func():
             api_key=EMBED_API_KEY,
             base_url=EMBED_BASE_URL,
         ),
-    )
\ No newline at end of file
+    )
+
+
+# AGE
+os.environ["AGE_GRAPH_NAME"] = AGE_GRAPH_NAME
+os.environ["POSTGRES_HOST"] = POSTGRES_HOST
+os.environ["POSTGRES_PORT"] = str(POSTGRES_PORT)
+os.environ["POSTGRES_USER"] = POSTGRES_USER
+os.environ["POSTGRES_PASSWORD"] = POSTGRES_PASSWORD
+os.environ["POSTGRES_DATABASE"] = POSTGRES_DATABASE
+
+
+async def initialize_pg_rag(WORKING_DIR, workspace='default'):
+    rag = LightRAG(
+        working_dir=WORKING_DIR,
+        llm_model_func=llm_model_func,
+        llm_model_name=LLM_MODEL_NAME,
+        llm_model_max_async=4,
+        llm_model_max_token_size=32768,
+        enable_llm_cache_for_entity_extract=True,
+        embedding_func=EmbeddingFunc(
+            embedding_dim=EMBED_DIM,
+            max_token_size=EMBED_MAX_TOKEN_SIZE,
+            func=embedding_func
+        ),
+        kv_storage="PGKVStorage",
+        doc_status_storage="PGDocStatusStorage",
+        graph_storage="PGGraphStorage",
+        vector_storage="PGVectorStorage",
+        auto_manage_storages_states=False,
+        vector_db_storage_cls_kwargs={"workspace": workspace}
+    )
+
+    await rag.initialize_storages()
+    await initialize_pipeline_status()
+
+    return rag

From bbc9583d47735029dfa834ac8e342b51864405b7 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 10:27:06 +0800
Subject: [PATCH 16/46] 'commit'

---
 dsLightRag/Test/cookies.txt                    |   1 -
 dsLightRag/Test/out.pdf                        | Bin 19544 -> 0 bytes
 dsLightRag/{Test => WxGzh}/T1_Login.py         |   9 +++++++--
 .../{Test => WxGzh}/T2_GetArticleList.py       |  15 +++++++++++++++
 dsLightRag/{Test => WxGzh}/T3_GetArticle.py    |   0
 dsLightRag/WxGzh/__init__.py                   |   0
 dsLightRag/{Test => WxGzh}/article_urls.txt    |   0
 dsLightRag/WxGzh/cookies.txt                   |  17 +++++++++++++++++
 8 files changed, 39 insertions(+), 3 deletions(-)
 delete mode 100644 dsLightRag/Test/cookies.txt
 delete mode 100644 dsLightRag/Test/out.pdf
 rename dsLightRag/{Test => WxGzh}/T1_Login.py (89%)
 rename dsLightRag/{Test => WxGzh}/T2_GetArticleList.py (89%)
 rename dsLightRag/{Test => WxGzh}/T3_GetArticle.py (100%)
 create mode 100644 dsLightRag/WxGzh/__init__.py
 rename dsLightRag/{Test => WxGzh}/article_urls.txt (100%)
 create mode 100644 dsLightRag/WxGzh/cookies.txt

diff --git a/dsLightRag/Test/cookies.txt b/dsLightRag/Test/cookies.txt
deleted file mode 100644
index 4999d643..00000000
--- a/dsLightRag/Test/cookies.txt
+++ /dev/null
@@ -1 +0,0 @@
-{"_clsk": "1v8cz8t|1752541383487|1|1|mp.weixin.qq.com/weheat-agent/payload/record", "xid": "fff1911b542cde79c5c47a38cb3929c8", "data_bizuin": "3514353238", "slave_user": "gh_4f88a4e194da", "slave_sid": "cDlUaWlaek5RZHV6SUIyVWNNZlJGYTJQdHY5YzUyN29LMG94RlptUV9lbkVDUWxmaTBURFE5YWNKeVRkYlZSdU9VRnNjWXRKN2xfZ2pZd0JWal82aVpsRDhqUnJXQkdYMml4SlhrdGtGY2k2MG95YTlQVEFVanpIR01oZ3p4dldiME9hRE1zcGxZV0FlNTVV", "rand_info": "CAESIPFuk5/nui6QoQ6zEO2B5RfaUmjuQjTJOQVg9mBuI/XG", "data_ticket": "AIy4PwNlFMRBDHcZ7jcXDXf/8fFLl5NS25Nj3tYuDL8H4W8EiURU4G9Dakn7aSUC", "bizuin": "3514353238", "mm_lang": "zh_CN", "slave_bizuin": "3514353238", "uuid": "91eaae9bc5e4f725e03ee2b7e75c8a2c", "ua_id": "bbkG1LsuVI1DszGdAAAAADm2HzejXloc87mSyGEMpdY=", "wxuin": "52541365079710", "_clck": "1l32fbr|1|fxm|0"}
\ No newline at end of file
diff --git a/dsLightRag/Test/out.pdf b/dsLightRag/Test/out.pdf
deleted file mode 100644
index 4aec06ad8c5bbf081626dcc93f2bfe0877c5ab8b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 19544
zcmd6P2{ct*__vgJRfbGu$dwSno$poV;hM>mDKbmuIdhq(gb*^6nauML8VnH$WeCZX
z$`pwxeCJ%0(*OJSertVez1F(UJ<r+C-p{l5Z~ykW=Xv&FJ1>Fbf$<_qVGt<9#_$TM
zfB-3<nz^H;3FP#~CMPMMxV?#iqm4ayM&LqVL2#MCk-&<;68LXJU`t?3U`hZXfD-T$
zzzE>Le>9+R-rmO8$;f0&MHwgvAwU9`u)qhiQ=z$ywS<8q(1yfWI24Y8VqhpJKOBL;
za6_>iP$&l{sgMw<iM26k5n{JxuC^u+KFO<&QmT%CJ}IB7D8$j;3H+h}f$~D(q<lsO
z5Ew5MFkE!&C@P~0F*UGsz?U&_uz>I>+E|<XtRA^ty$;`b14l;_dus@vgo(4ck%@|w
znC^}Zm|Zpi4FD}6F;Em13r9e<T7v8{82fu+I6oGO;D^IdTZN&#y97TC;omU?3gOe#
z(uVLOcrhp__y>WIVqpm0ofGH}vtZC&O{wAgM%3Ec#u4ZaCqqa4xxBfx1u373iIF2j
z2Zn@gR|$b&FeqN2<MFk_p~#<dEM`ZpOUfr^V{Z(Mqz>2)&@7)Q1dorcmW&*MVFa3j
z@TsfFkb?gpr>__|8#ox*o7*~`b+$1#zIYmXk@FmWG=DmZ+GF&212bTF&Kua9SUW=C
zz-1MnS8c2vfl~(v41^{iP%&|^ak4iu0VE(K014h#;1nD};8elH*xW$O=IWNYD1H<K
zi-ha$8W_CZ4#>rAEN$#nZ4Hb}@IIXvSG9#~R|J0j2?D?rRa>(ift0<0D_G@E5aQLP
zR1F~8R&7BO0|7AuShaPEg@C9~!E5f|4CqR*A{^i-z78B3g8orRQ_1iOIA~jies&(-
zS<nX^2!8ZcAV1Ntdk)K+SerRshQPLa6K8G-U>m}RvjnD%go%-ju?g6?gCj7#tw^sX
zHpMDfFu*7R=GSP>5}&caU1B}$f7OD{?esCeB?>o^JDF_#_gJoPtjqSEy?>eURzO*K
zv5DXbb5lK;v?TALi*55O_HV4b+6*jfZ@l2b>gvH8%upgK<2Ew-%_H{x!RK5#dJU86
z$*oS*Qtk6;L%&sV(Qy&gDfIA@4xX;KbHh7`&usC+1ZVlle99yA{lu@9RSyIT$i7^8
zk{@^ba8oMX=a}2LXB*nI-?Yrvk_eb459iMmPSi5?yAO<zG=4W@c^YKU2p>DP5auU%
z?;i?8a9ARuL_ghvW8<NGY*?N)VFG0)8Pw@o&VG3&{f`71fj3FRppWj795g*S#Rs|g
zIJLh->CUQ^GqYb>ZD;22)EG<nTz=iyf?!JBTFZ=F!8fY2QIo=L$s5+oEp_x(o4)Vd
z3va!fENgOBy>Pc&WSCHYx|uzA_LybMU2_G?LyVtQk)<5$3p^yV6jyl0$|$5zUR54n
zX-l3<guDn4$;mwDfA;9f^xA_2YTDUI)sD*GG{PeE`3?lzesq;Qs~fH^6&bH&eNfqs
zA%<A|!X3N0$aDFYhozH^C%ZD@9)>T$>O;t!glI-cNx8J4%?2G*l4Z^5SJYkeF4K%a
zxb!AVJs)SyaNmqkY58)Zd)>ox`MZj;UT$AXb_=JXMub|ESvZ-_<-8eHrk19%oQ)+9
zZlirZl`MC(g7k<>K6IbwMd=gPaAa+=E#H}QaI08lQ}&WgVQ^hQtv-;otLy#n0j}f}
z-<Nmsvfkmdxc<p&a<~gTu$tI90h-uXp1AAf7ehzR(F>S@?75AwZ7#>d5kxPFpMQs2
z0b1O(^x&!G|IP<nlVmp^Y|X77d;l`(U-7|irvCrM2jCR{A7(sw>3^K(KbC`C6CCt>
zhrf0v_%_Y_Zxg)7g<Mfhn(x937c$=S(gEGiA6-1?_Ac`v#a#lQnx}*~TDS8RKK}P$
z(fJXH&(yQeKc#s}Lvrd+HDt{vY5iKBJrOGrALO}8P&g53rU=b->IwUJhJ@+shoML!
zdxwbXfP?I&Zl<vTv9uy~`yF|u?G9>G#fqJHiWWSye?Kd$5v!4SQC+r&?x}ncv!kO(
zu{jNr2|@y&Ad&DtfMjR2g`rS8Ac5~)>He=Ef$v!~fTbFbitR-M{<n(;UT>$<|6?G*
zJF&GG{8$C>p6}SZ4HEcYt^zj{i%AJ#hlfAY#QRTY+;Fj)BJ=S*e-<0$n|L>fgO=EP
z<mI$&&1Ko{l@o;>78)O}Wf51f4BKV9NQ<;NBWYrL+KI;1n%*Nd!k&;EM@R(05%TjY
z{UmOwkII*bT-+&dzEV5Ps%fRZD&itm=#XnZaQ}_PyhN6bcDq?W@23kDoKnVz8G7gI
zQXar2w2F;v<l2Q(*V}GV>iOLM6HxGOfv&<(KiCBRFWCgXhfUzWvI+ce*#xh*)A9c`
zp!|VN@SgA3y91QJWD@`=WRD>f!}HwMXwEuc6@62v9hBuTS@WZp5=PprcAr*=o}JfS
zddf6kCs5T_FizP?Zt3}8{JahGb-ykoU2M-8B6G>6%UbGTPY(P;b%4Z`J#Iv@q2qn<
zQA~AVm9X)GnnDxwx&dn&aUO!{c683P<b3}?UJr)n-JzKaSGfoM*rHTqnn0i^4@E-K
zl6Ya8LCf622e0n)b`$a2ODXUll!AutPzvH-QVL=Zr67K#6vW?B3SMug=l^3!!8-xk
zrt@>%!F#@A?>15pe@!WZhaN)#q-YGCLDtE2d$b=vKzz^GmV!9vt~B>S;?4Tt4ge{S
zb>Fx@n9!>Yml6;zI<gQhUMGIwXhm9eQKBiQGzS;1CD)LVT{`Y=pit+5&$Y)YjteS}
zv9Hjq1fEIG%{j(iWGT6NtyaE;&7>Q5tn-aYQoT9sl78i5odI^n$Y@zEule<f8>VJ{
zsNU(LpKo4nSTrK%^Jf={-HQ~wU!bqBT}b(toPyZHDTrS=1@X6>g4f&W`TrVH{=g}C
z&v)$ILCXINrw|oT#~qhvlqd1^wS%I%;(DbSzk8DJ=9C_gQ^ae=&(BAQkXlS!)Kn~`
zE~BDJQ&oeJQ5m;Wuq?Ji8{`b0y$`0x3~3B8YYM2V!sPX1XeRfY#D&Y{8ux|1;hxvL
zJyI6OSEMtVZ%L&nn=f<A4Ma+pL@MHqYVfL5*`-z3<3m1bA~*hkls|9^@?UZaau25<
zf8`Y9-*O6GZ>Q(~V@Sa}0ot~UQ}CYe*t?As<X>}2EpVGcF|)#5LCaftCr%?ttcXI4
z^iXjG74<@+{^<OjtDK4q!$o)E4Wg2~V#uz#=`(jT1v&>FIF*-~Q<QAYE6qU<P>KN~
z?r9v0*crr=C0>68QKjfAUe8Cv^+7GBxdD`Cbd-hlb>BNF#>aw`LIY3=D#f7oq0aD6
zN=b?gKVY$Z(P)EmR>Ldo6QQ@Ui0@vY;N1dU-F2IS{Fju1+(RkIUnvFox0Hg{+v)iK
z8c_Z~DR|F!?A-y%UsK92i_dHP;={7F_yp1T`g)I~|GxMXe_6OLUgvq>XlB;d;=|2_
zdj>2%IO#Zu$Wz3l@4RbPIKJqrR?nt&me8y#Tmh7a8I*<I3iS*|!JV`(w>&r9S$y0F
z*w28APx7+YV)N-7|Jmp}hIphL{&diXPbBOQNcjV$p#CMLp!QG->Q_oZ{Vk>7^>%vx
zKZX>%6QFIoC<X8Nj=kGRLH#wQ?EdAG@1pQ8e)%|U{qlK|YiMl6-E80&cKoD7{P9c|
zPRj#=U2nD5d}`~0T0o#AI_G-aJ$J8<|5e$H`h2-r*M~c_pO04@dJtn35*$|C{Bh-t
z(wicus2=B;AHRJ3-ipNR1q$9R(A8a(g8G+~g4#nVs9z}s^|zFQ*W2m%{~A#KKq+|7
zckJB(%3o7TF)99jmfM;(L66t!)^p%~cAHYb`&sXi#9fq9hrgd)2JUC8JrAf<WK^G5
zET`_z@#!*Gg_2R7lBa-7#Or&}t)tU8;MVq+?7W|j<-|2YufMv`N?g<7cu_?`A*)7j
zg?D;Mp~>U=FsV(60{=<VC%A?cuf<%OIO<ZT3p<oTc>Re;=pT^s2TDQzOG-iSp%nD5
zl!E?SO2O;x^!$GeDR?J9+jdb3-t!%Mw~>PWYf1qZAL^lz!mFh(`aJrPNeB`$v8Qh#
zWGbSO7Y>tcc#c>g)aVqX?Olc@b?am}9taeZeYo!FDo!YAmG;iR<bH;}08N<QpieO~
zi%|ec<osa=UVo-EI{Kl->e11sW7Pms#O~`vkvl$d)Ktmu4ZoOmuCrz>)3E-rm%mFq
zPLI<luh=c&TPt_)*@V=KpKo4iSTpj&_(c5aSqk1O&{G%+3!(`8ivE|3g5JX@=wBHH
z{kM#Q*W2m${~AvIz$kdnckJE4$-iKf9}nG7`Lkc%{r=FcarZ;FRq&x3Yy^1d)_zxg
zYDw*MR{SCM>=0N){;@zxQd;SVtF&=%CaVmVBz!IA4$Uo}IdNa+FstKB&edy-J=2L%
zdTx{MANNnM-3eJeJV^A$_ztNygVBu)aOt7oC;J%e)A*hBb>a88w?v+%BA$B*1^t6i
zem-==fCRDQ{-sMAa-xdj(n>0q;E=5W{~5l*fW)y?@@JSi#@m!lk>}ceMN%7T2&1c>
z;((#5Sp|e?bDzsT#W0mSCTs38iqV%@Znmh(Ufb`)e8Zmr0|cF+DhD_BagstHJCQ@o
z*31M15+)9g5Wd}(fW^dZoPbap6byWABOI@>V-7y_pycFeX>JV!8ToML_CO_ggMnaz
zyupu?UFsn3?fAVFe-$@yG_bTW1E2Yu0kv#%#gAIIO~hYS0D^Ep?9{<z=PD`QZU+dy
zJMpo?9W(zWVPn9F3mE+~WQy4a<Mz{U%=WzeUqhyNy&Vt#V-peY1ZdkX!p3{PWAFAv
z#B9^j{}v+o<>@!upC;n}`1Cusbn6~)R=w%{fZFd*zngbG{dOG6S=^@s+yk0upPh7@
zt9!qs5Q?9OZRpA1GW6?D?(-ZrnmST#5esbZn7h0tTS)jk8$`VJPDH$0psTy?0Wmw=
zBp?9c+X5sQUIwNEJ|=HtzZGs)G_V4Yrz|ERDk{pYU~Xh@<6vXzsAV8+Vy-TuVq)fG
zX#keAG%#}jV&YpmV))294-AgvMZsVg7#6|<hr)QlZ$PK>^Ya7Q7YHN*1Hr;jyhu1d
z8jAs*Ao9ro)4|+G)Y=TlD1ic1i8>g8L4GuXpBH$-1_N%z@TkWNd0;>-7$^n`Ja7b}
z?gqAiJ!Y4I84H}LI+|ErfFSvKu}~yXHJC~PdIpTC2^@k2=f~~|gBSb?0~io5#{y3x
zfT_F{8pm$m!~Acs$af|7P*Y$2B)C!+-<lrTnhrU|^5oQUX=x_Y0HoMamDB&Q_!F?O
z)J433+@mL9+824!=V1thq~^l;^PHq@xm6!iPg*?k`Es)=mL|?+>fB1lsRJ)J=T_G1
z<)1%rn6I5(a+srl_+cY!gGHW3KX@89N&Ka!$c#zMTtVFB3*WboF031ZEaDCp_uIJc
z+jB%yjbX^XwU_sgre5G{a;B^By~1#=CNZ$K(a@cv;*$C0g42VgQxDVpzTP&hEh)~s
z?|&+a-m<}G_F-m8b1p;ViGTVxd?@;=B`A0w;Zh-rA;i=;M!Uw1bo2BCYg?7`bp9ea
zL7OgAHSnc;KSsuJUy+hU!u_q-qnArLZ%g+Dzvhg{4VDgAa4{_CzA=wG?KQ_DaaCz3
z@MSIu?22Q8eiF{*CR7yOW%+^AZloi%>9HsyooVR#X0EYd<}Rw#SLU|amCsf0sS*8i
z_Qt^2<b5?x^@F7{X}*w8Y#|ZQPji%qGB^v*yEtS!&QX6`%IS;YW2DEpRm6J|3qK*v
zerCd;*+a1=pv%Rh_t5@CThKQGYgsddnUZV=*9+hQkK$cXiU#V7_Xir#aSLxHj1Xg-
zDl$met*cS(N`@8h<vlOqGDwx2rk@da(iy&ZuW;6;E+9>mmPx<iAKNO-Cs7tjTLBO3
z^mTEe<nD7ws|fp?&~sLi?<^i%&Urb@d{SQDDe&Pj%ChQOIqfc#i-YtjtZ=(?2$wlg
zk_~ZHTkol<9wz>!p2i97?<o;Ux0LThoxhW)>2B!eVR`YoW1jxmB)yBr1l^t(wb!^J
zrfcSfYItqz*-KKEUW-vr(TqQZj7c6`vAdn3ohHRQy^k$*^io}!F-`uA!N&c#dMSaH
z@1vjZbUu=j=kqwyGqA5!6T)<l>d^bcy@U+>6e8i1+<~W?DX1>>H45WwCazU>SuWHy
zosJ35jdsgmX?$z6RBUUly##OYaVmcK(Dd=fDl}NIb}Fmkw$PU|9xVBHv?MM?EajBR
zP0>0Utib3}=uUiCc36q7NO%9fWV0@<c`#n3IAT?MJ(RDvVR^!TXnmSN_2|dGqLi^q
z#91%PL|;%;=6QA0IsKE#G(XNM^}3ham*#q-KSii`B{nSGiQ&C$jt+%QPys0)ZHXp*
zL)_c!M-@>2bZJ~TQ3S=&>o3_N)S_fcq<KXxh~iZgvvikzcpLQ>79_(e<$Z4Q(Hu_J
z{xpd^$@+=uB*a^%Y&kIR<eNpfv&llQ@+HkDdK4t?q`gQvNd?zWCbpO5WKSnvsT3OC
zu>aZ^!YQaQ@G{zIqwy1E3ga5vr0*1b)47|zA<OB)eAG-ZIZdZ#6_k$m8uK@?02DOZ
z)127NRN3jvN%K2RC+VqLw6Ac;4pt1NOM72(PFOefA%GV7N(yGY3krFj8$ZeBG+XLN
z!<11zTrO$588x(6@LFJc;nn@Zyk_CcdN-Coa#5?E^wO`ipjgrtBxEb?<@F#D{eF!n
zkchFWkijzaf=7n5lr<TvsPU=A9_g_c?i5uD<fyYmaIu{3eyi5JR|6<2(X8nn<s<#^
zIWf$S%gHo*pBFe4F~7doM=#SFUGm(VMmxo(vQ!UtM4%s4L4ceguJPC1Po9%J-rdyv
z&%1EgxS}yl7R@`(qy0xL`mV+v$Z9@x-?{wxaLLdbqh~=Xg!A*M1F~lO;%@XJ7`zFk
zb-t=z?S>Z$+`F&6*cJzK<*Lz;eP9_G@+<^8hMPNi;6sE$JK5WuFSBW~%9iUTbcwOP
z4)aIGn-t{O6}7Nzohw&!;tWw-NQ@VSUW1lwq1dU!8v-tG-?2VUL^(9K-p@MSQB@P(
zcsD7Yhd$|nFJ-(b4)^w>?7D5OQ@kxhfjm85%<vmS+p@k)RM<Y<@3WP1bis<;?glod
zX|G+|+RK`T%hWYnyaZG|rSn~{xE+Vm2+&df!=p>_=*<Vi7AZ-^BR4~gl>^OXLk<}~
z%6)1(I#+OKK4m|bsc97|bZB&Z758>kC791`-8d?M@ohMR3{I-!#)D3tuh*U!B1zK*
z8izSure(!PWIAXC)or_`6-cQox*2Wsbw`|wNBRRGhtzy;FHv|FgeFI$5zFKW{jFp!
zno1tL&OQVtidUvSMJ7ewx=2jZLd{>zwK@5`p0wh-z}Hl<4l?n2n-W)kZGX&Kw^9Y}
zl=CGkx3S>Nkw&)ecZr0r>CcvzoLqS_$&hA9a5+j+x41M^(1FeIB_&CYl4(nopWONJ
z&)!)JtDzr5-RXKe>9ZH8A6tz)aaurzziKlgvI;A_iIMe_Yel{`3{XOaBCGH1`%1^<
z9h#NDa;O}$IXTZv|88o2o{sk8ypnC!J#?u(a!rGLOd;Uz`mq;oRb&T<zJ-tu=fBwR
zsn~eJy^O3ctw=|%NI8vJw(wEcDdzwp(!q0i7Uw<=BG2<WYWI=(@%Qk0XJEcNhJU=(
zPzV2zlrk{j`uY76o#ebbeFnpKVmTWRpRkOk<<pi>r9Goh&Z7ybp%boVZF}%JGF8i^
z(B@6qD1Yn|R~qx$RK_zEuU#{mRR19%=Wb-^RcyjwJeqoAdTZUg_@*sQn?jF$2^?hQ
z(ICl9B`s*PPyF<9@!=z<r#^FcN#mq7LQi`3J$n+kKIEV_$x=Vy*3jIe<@UVJfpYM~
ziTy-J6z1gFJihiLZZh7g80<&o$|sIW`maL|oG^K1<N0MSE}hCO+0Qk*{8fdsNzv7P
zo$mu<Y0gOD;_nGa!CLiiQZdRu^5o5?_j+%PIrl-g$X}2U(T`@iZRNvNvA;swtVHXs
zK4uoOF&ltP8x{+!c&ub))t;}uR#N^&`D%j6KQ<*f7i+W!HBG2rcR+>&15QOchCy9j
zBvihCT^`jtIMBueWB16Pm6NKv>$IQ2Q%EITmdo9i9;YO&^dfP!Oz|~NxN5^!tE!41
zv!tQE>(%T;adv&jm9KUtw#Oc@uV3uW551ZZFH{CUeV``;bH<J{Kz*P)^FhG7RP)eS
z_Vg)5S<XyvrVFt;`kCV{7x=W*>PoF!QYD>i8V)+jSDd3x$)ob0Sz45IVu`G+kB_>L
zoO0sc!1u9^{zNUOp3KUg!V-sZI7gOLS(Ac-r0#LkY2|R*PO2kUunVv3UYEnCU<=~8
z`%<3Vb)_978!nbF>^JtbIPBT%ynfE^)>8MG8aajO8vWJ>u6a`Q=-XZ=t;DCqJuI)k
zzZnG^3~m(Qjgc2gc1u--y-v!kOcH*da#}n5CSSni+_Bb+5swNiS$vu(d-zoq>MONw
zb=EI1d%X0T2+H8kgSe@<>m}WOt6G#Tc)zydo6HM8zY;z&CWCq6fJdJ=F3_rb5)nHU
zI~*0iKU#yDzTfPm!zwjD>jxtWbdD~#PFQuwgC3H^WKqRuDQ@5G$8N#T?$;m?LF$@P
zaB1_&>E$OCJXEX*?m6!VJZ=}iT&mb7@#r?<+!KapCleCpP`d8wS?<#+7n-}Ms`>1M
zXUw=IYFhM@F;NnfhdR|52f{T{ZlW$bYD*TWWRTztVQCdl_w~1EyjN1e=G(>yJveZ%
znE6tHR>q04?3}~tT3Mr%UmH6En~HGD<59{Ms_)EB_-8M3Ka(ZzxT5*GV=iAef_hk!
zvql8#Ua5P<d%v#vm|CT&wvmF<;<4Lz?P-%=%+FfM+}+6UE^|s1LGbcCcQv8wm_KVt
zB2>g!*96rmgEOvQr}fmmYk6~7q|Yocj`doiCcV$(I9VE>T<C{x8b^O*N^<0whIDXh
zk*QSAauQqb^D%`%24<h?xBNHRr3>4QG-~50-=q_}g)0b`*{!)~S&&nwh%-<4R~t}&
zY=iP+qG#VEdwu%u^zaSIllN>Nbm^bVPG9(d^DfKdI78odp*z0Yi~f4``lZ56$m6=t
z_ck)Dn!k?RHN7guWL%n6GZ4F)=gKQ<mTh%PmLt|Lvg`8HdjgM11rHCDv-{MA=jA>7
zPE%77u^lA-r^MF<>;7nhB%y@$Z7VY7Cim2Wc<oCjS#mkpAfcT+p}`}Uk%2dL9@SQd
z{FKLpgNTY*EeCQx4T-oVS${P4XVO|qxK%s#$h_mDXc9l-6))*>^0$s_*>WMFj%3_^
zzBx8eye9Vx9%AoWT7EX0;^A{ixU!f$O=L8|Je7;4DzU+xa9!OEfeK|;@mz0mzZvAC
zI&+mLKcP5awu++-N$!!3b|&K24Qi4O{tCB~tV<_U|Il$cTDs!AeQvS&(-RHN&8*#e
zqHGsRIQpk%N<-(b(|H}$SO|(T_@<cM@_xFx&aZcWD=q58a=WP94VX6h`lqlpew&9S
z%>kvI8XCeR4eKNtHr>GyCEm=LRty8v?#u`KR|zDFS!`*J6^0I3x|u{73LFnXnVs%K
zhY3D{U-I;v@o@4xk@xY;vNp9(S$*6+58kz0VprIQIrl`}+Df7Kqq&A&dWflFZq_u>
zuH>oaH<=cLWKVjIKha$N+&c{2ko}BWera2ixZc<=DVwC)?C$gRaT;^!wEISvDLR&^
zVJZrOD~`8O3ai>@mhO4!v0!kXW{6n+!XwpiiS-GyFczZ-m$>sRvlrjd&!tEBhA)bn
z@l`_+6cgVc5^(9&tbO$v^2>KbR_SnWoS?5@z6%v<2$}3w756>Y?DvAWzrSF@@~V$#
zWV18k6>JUr02$7i5mQ_wK$gosC1cUXUYv57&^Wy)Fuihgoxg5I96c`PI7F6L?tbXw
ztpstZu@KceJ!aA9n0!m4<`2w$9zs^(wgTy{MrIjmuRhr}g|kWAc=hmf24((3I*aSr
zFxT;}b7r>)Rs5pk8QG%WRS;zF3mn0H>-+q@*_z$+(ThN0@v8i&$1{}0Lbur+h(CW1
z&7Vm_AAY^>lNR^)=ES!f3}0%`Rai~;9}#>UMAodA*fl*-lAiivZJ%xsO_yXZf!i{r
z;B#IdQQ7gU5;(ap6Iw}X3A$9nvE)bm)zux$b*)}SvZ|MBGFrSPLf5~dKkf0FE1!=m
z*KOIHB8;<upl4bAURsMVw0A)`hs(V?`oT<2>cFuWsvB*@>BrQM_DI7C8Pblhtc_lt
zU@vX5Tq@2AUnZXl<UF9RvhiZr=+-;=w&$`pIukC|73N-<>E{`|!Jd!di9G%FTUL7B
zpguj%QfyN*{o&&}QI2Qhgp3A5LKYMFM<ZFqs4j5yDzvvta~HNJG%6LnUe1To&i0Tq
z3_UuGiJM3t@@sjd$uea}D{t&y40U;P|K=pmd#%J<f+!mY9Xsp|qj(oo$`$K?f-HtR
z55MXwy1AK|En*2!Nu^P<8qu*1+++{W8iW}VF0^=rUUqdwKJ0t5SUS}C*^yf$mGVrD
zmsa-Vc<%|_h+}+pO2Zl6U9{%oN2iJ6$dyVsBM7q$KD7!OjAB@LPrHjTe5(m&vYxEJ
zPqWz7Hg35u<nY4T1<@M9>ZwTaYAxS8?q6sWXjiq{<T55X-@Lo)&|LLRK{R!wrD=u#
z$m_U<19{A(EWO+X1&=9!(El16m;DGGF(mq<b@&D@C@DQa@~)-tQ{9zr%m5L|_ydmv
ztw}6i?Vd@BX^uhNK22|P?mjlBlD)$~H6~$o{%X}!1<S%hnhn|1ke&FED)QBNc8w!s
z{(Y@fqI!NCpWk=oci&$TE~<;-xn5q~RN8bq|8{KAGicuQi<XoJhIJA>b2_Er-zqLy
z$%K5cx1J6u7xy-O_RmYZZf}}r8k*$1mw2hQ$w~dk{23gywcppa?=R?TJwyFw+`@RE
zo{meI^~ifCxZROFoii)}6wfYet)Ay$3TcIDiJCYEXg=_mEobaAocZ$V>{<En@VxZ&
z5XLibsT*za@LHr<Tek8jr7Bl5yO^VVS6)SLYU<_jikC0n1uUsaKZ{cp7TS+%YN4HA
zXK!qxp>1v!^BN92&XNZ2Ud%Kx^$RCB27|Mf+Fo(PRT-?czkWUR;8Rf0rw2pt0-`6P
zqu1J&kkU+-#9Jnsn_60wIzmH<<jzbv7teY_)ZWM1u+5b5zmGH`#T4&D>BtcDnrLRO
z1_rJY5ghn>gMPA8=ia^zAD8R~=4&b5m$TPn)R^es67RD=_mJG_9*vO(<02*Z;ZVjB
zg*2zQ(1QuMw8%T{&Bx-s-z45xHmq_QEoh?RC^EbM3^HM_B-b~WR2n)u^(L?NTEI%%
zxRXuvlm5n<!dQ>5uglzf930#WZqOYxnR1*f%p;V3SJv^E>5gs7JwNWWcE^~m`p9lC
z%B&3d^}CKi>=J0ZjJw?V1M1ob$I7V4pY!g!eEICvyn++@!wH?Y6{Cd$C(_s2>|`iz
zztF_+Y9S>a<(6xZzkDgQ=IG8~SwnXbGT9hPVO;l87iu-;SA7W4L(q+I#myNWQch?J
z4IgA)wWhghIVSpG1Tr}&#?P!yzgCDiZ^2U;GFUG^G&j(&fAiYC?v58ldg6}0Lek1y
zf|-3?weL!9zRiI#s9AqTWEPRXd!8|MDs(?P($d?|c&xy)?BfjuhYCTy2SS{IlcfrQ
zH<OSzR-YB{=-*qICHm0)U8LfhI@c-VBR*OYOjT}!=qejvQMYWKx=Btx=%|aWE1a14
zJcRSG9H*Y=5`zt%kW-0ZkMV$hYYOlZ;k~y2|7dt-`5AqN9%jg}fv=Y<jNW8K@`eoS
zdlXK4)N}5AnJ?dLznsQd?xg2aF87DuMOS1?vT?^Bal99JAqlHEj#$)kUPc~EW1S~-
zlh>>`rYGy#D^n_*zcxQS9CCj6GhZx|@Y=ICay<=1lG=G3FBER&roTeib5xRb^(u@w
zMr|Ovq5NNIo;p=8tjfs0L?DROgE#9}vu(pwGs-`_P$HLF=VVcrj>#GpozBtcBcE{@
zUQ~TZwxssGylv2OG~wN>(e32In~zT2J{CTxS4vc_S1*fU>ApRaz@x1s@8=}DCam<T
zk~sX@{0UpZ`q4p>hC=jQ<o8t3^7{i6jF#za84l;3b7T;BdbW}gEe%cCb6JJU3f?Qb
zSiXME`NO$yZ*(h=P)F0}B`F-4rP|QVZzK6xN@n@<0>XJFzKGgU(ftrAy-)L6-OV2d
z0?iVlZq#w;>0;pKtE*|3%oIq|2ajS3@;l^{*Ar4Iy~U#EBcAY``v*3j^f1HlMoPgj
z;`F!Cg0`X{yBzh__;U9vBkqOHodrggbhY1?ot_?<D%XhXNPWrO-8Hy)iG0jBWzdD2
z&S>;@;_@MVWtPv40#F;(TteS(bAHvym*}3oO`)15N(qQ}I555DDz;LMK&W(Q<j-7{
zY|d_ujxsd8oALpNG=2YpYdSnh@CB)08V^;z{N>Vu%Kf@L9fCMPZ9!99X-ZS*G`cg4
zNt~GTqI(@qvn0W=C0?fmjZ$(VJrQX!Tx>{G#wXP<YAvVQ9CFmI`AXt7KK2ibR*4*$
z4RMqdRW(9)-8gTZ>=<!*^>CGj@{^q`@-uaVoq^B{Tg6;W)A!>Ok2R|x+LF!&QAAg&
z(H|zVZJM)b{k~u?2rjN?u-1ylEHrz331=Xu%JlG*teJbs>D5_sZS&wlC5F1&cP5wH
zvBXS&kuWaors6c&=$GVnmlMg=n;9qg$X(8tCkhQ4zH&1QxZ~?LsgY2qXI1ah#iQ&d
zJpST!Y@K_)-H_CUfyYI?^6XEl^ClQP1HG0vtqu4#9x(kA{HRCSA^u1S*R$KbYLnuj
zqgJu1vEGkp8Ozk@#CW=WwVLi;B5R#F&~C;ZTlzG+=vL$r-h`L!0gw^b_+p!Of5aCr
z-M1SG(+=~Mz2%2_IIRTN*$WeN8q7_m@695X!$tFDpT0bOu&&g23Uk{gQJvA!wDvRk
zr<A^jWd3F$Z;}L0kJ5QP+W01{LvR62Wt&*iaA@lEcdaAJ)#-!Fua~)QO~f4BkPMBP
zI5BB_G~}-6dnW;@lZP`i8e@bhAFL64la0Q`!nkmcB$DK;-<1PAkD&*c4+s-KpE^vF
ze9E8Xcu?&lDL-uU;pY0Iey@UvA&o2>Bt8sd4P%HyZj&qdBu)KOV{utdiG9Px7&q=Q
z|Co2?_D59ik<@2JJHMz~;<zWP37Mg^4)JZYwDT293({azU*pQ+(zx3~s$z0~BXuqG
z)@$?79``FTXSM3i*Qu}jtQk$6naX(HqAPKm_O@I>Qo($!a^;yyU-(q23~E!bl_m1r
zro1EP6`>%)&3r$K;~J2;>pbDy#7p-lyo8UtL@g-83z}Cj(mwl%j~*~34v!^8Fja}*
z(&7i~>>eH?I)3;ZQ&rS~#|W~Ek{2EdGgeXcM&DwpN~pS3?`8H;{#&mL%ZF?_=b@M|
zN@l0G){jXS5X5te_DM5lwkJexAIfU_5EoWU_TFWxs%OM_$hT)~DLKr7a>a2?thw%B
z*h*~Qs(Q2M!KIO=soWU2Q1ne@_tvV-Yp^E=--<4Ng&Zfpy>Kl2+`<Ud{?$0+RpAuM
z`6<NMQ?oDZ#Ni%uE#Fqds-h&aJI{Xd5@LPRY1UXwNL%mxcHIN_Af+>C<@50cFQIcs
z-sblZ2vq?#%#J&t&DpKeBcme^Hnz!Ns_t2}vnWh3L`j`3Y6w;vb79D#K0%R8tw*g0
z5#VE_BD>|Q$<d^bqY}rl(n^;|Gvz#t>_}(2M{h<PrRKU$yIOnL{Ue3N<Du})jh<^M
zB2l59sC3$q@@M&69v4R6SAW&zyxAUUw$d?~W<A1BB-J|_KmEz-NJWp`#Vil&dPRuL
z6VKdF#!QO?2@|Rn=bktBrG(jVZ5BRXl8SyVvId{Lbs&lmE}E?FJ9RmtdS#@Y<blz1
zq-&=$@$mIHo==I`HS*q?@}rlgglE+`RMi7su*ms;&QX3-eB#^J!yB2j?wgvKDEnHf
zjMtbN!jv2Rytk^KRnv$rGI6Ut%c|_OfDB`EdyBpAe6JVFjFPL=PS)YquRA!y>Cm1W
zh!v)}t&@MoLQ(YF?AZ{(<Jsh0n!z}h!c^(1JdW=OTmSHpq#K33r&oq^1aD1#*&mrK
zv|jQVHm2m{RWhW(Hl3T4JTmVRO81~rA#+98V$r+snRR2eshYAutEZ%Y>9f+#sZ~?v
z=bw`dvfb+IQ+w2+Rny)iAj(<hD?`*#Im%1D(nsUg*;+mie|?(6GjMa(&3!!LNTF$<
z(yc4%s-3(ftu9=sm&uli#txI%`Avu9&&QS3D$>Z(QDmhx($R#Q5|BC<d)~(^E7Fa%
zCMn9S(;QPOD=B#@o(zF&7v6TVGd4ABN{h{wEWiHpjQ5^UtHjfa$9;#Rtx_3Ml1dn7
zRmujR^YA-EXipbjV*6If71nya>LhdD$w9lyylI6+ULI`0b6Dx)Vq8p`E_>n7$)2v$
z7FPntEhiW0J<q<hz2i*Omi9Hw*>%cPOUe4n7p?510OidX&&sE>vWZW=SVp(=)vlDz
z4^=)(qJG)x3|D%mBH55M+*<5PCcFBcG(A%_R(F!wKDpC8qL>ovbxNo@i;G!b-KD@<
z_{k=>jZMDZfQ_$JiSTp;n}(eZb<HiG`eASHg0K6|KIAO!>1LVlqhnZIbJpfAHcOUw
zEuBf)#B^}iD%_vZOC4CS%4!RDX{+er?3~D|W0U)K*2CC$rfc(})GTF5D@(l2{gnZ!
zB;RKj-M_j%Gf^Bg`L@|YEV0}(A_Q^Zyk(VI=y_ace`!MQ;Nmg&=wn|`e4yOlc-by0
zj)i^9WyXpdHTvC>5u*_+K~*o)>|#~YIePz1Y;e1mhd5#-*D4;BGdX_c<wUsrWaooV
z&4msz>kb=d-1;{7;vajrkytY;Y>+Q~bX?pU=f%e<!B{2+j{UKD1^YAOVt1f-*T!#g
z83`~z3I>7Uz(#HZ2NV28W4`|}+zTkEx;g-1RvBwk8(`lt1V|q+GY9r>x<XEi0{K8D
zoDe=Ga67oUwHf5Jj4=?#HFtCcgsM)qww5MVU|1Il0rxci8gho<gVI1Y0?>e(jg*Xp
zf&s9-dPkp>PXpYqt%CwWvlzguKYif9R_@>BpiGw(*wGE_fdj9R0vo?{kU%CP_yGlQ
zAc(x9v|A3!cAtaaVc0Fb9o-$h-;cVaySI~L!Q_suATwkqsEh%2>jPb|z3UZpS;pEN
zNJRl&ieRK_;s`M{w>Gxl-pCFy1p3#S6xb7MY!2-7-a5noH?lIY#rN007!Y-F)xBc@
zF*Y#;Dgt_ZE98$KN1mO&=K*wnmcXYA0JZ=cuq|W2UE&Vl4taY6YhZ)E0Qj~5A&3AD
z_`_$={B|9_rDbUPYw>L@`~hDF45$MNMF3Ul2=JpYIN%hGhNFN3QW7o!9I(Ju{25df
zyoQ4zfdi0iAPyWb{3uEMH3@$38XN_dL}SFzzyT-%s^f4_un7zXe?UrNK|K_lUko^)
zQ8=(pj2IGBM@sU8Rim+RPyve*#{dTu0*Nnzf`OJ{C9t4nfJNZ7Ewh2jpkRAYC>-n$
z6b=oZp%5rg4=4iPI)Hj8NesRP3>3R1+;$!>L`iN}y?t$49rOsOWvfnp&<GS9wq-mD
zv>FQ)16x4B(4Y{7z_-qiMgW!JcY*E<0FXR{2hxw>H*&HE->`s38y*zkU<1icKLBZK
zV+-;R{<D{ez%-g2xc|&8*+n^^5P0*(UvQ12HGmzEM}F`CkMvF+h_L~X)&Ve=14IXy
zsz6=`kVT?qqi$^u!VQ9-bSghnuzt=#pwy4e;|`>Ja^}Vk5K<k|tyC!dCUX^1{5E3X
zuqDLr08}AGY@Olv?x>LBcV?)N;?wSd!`3x?#;nR#LZk{QW-HwUuxl%$42A&Gy|yPf
z@EQ>b96-q?SAkhCV+ACEiESUncXP*8dtjPDffrPeg7aeQ4}wMk7=oBWe#(G!9`Fxh
zy)A=bfSj4%WKbv!OsoB+9DqzPDf>4W8r-7#iwuTAf~n-c$N*h{TYr}!!0!329O4h<
zkl=oe-^%fWS#Q6}U}z+ek@l+$iUBvV{3hcE$KzKS3=QTW{wCuGlU09}!D0M+Y=L30
zd)k2gp$#}3wnrC^f&uw{zqRoPn}PS({NB${IFkPlx?n!+uk|6IdwoM7pnJv_j^*Ej
z0|f9=nmzR)!2RpLwSflri2o+TqW0<{_x3&F5BNi1!7RaF>qEl8OuXM@NbprIzse9m
z;@IypB(U#)j|>TJLj6@2i2?U!|0d%H)3|?=0n=)a3<LdxE_RPD3Jwy^ul1o&K=Rw~
zG8l9ZUQlo_srq-_z1Tou!EKYjmO}%tMEYHZLW4B)YdI8lFHX_Wz1Tn__TnB~bbqf8
zfX1G_LL>I#6ph}C4KzQv3Gr9^FtEMYKx6h|91XnvX|FE-9$W#_ZI8_u3`pd^wTpo5
z8D9)?FD9`t=$<hGUX8T3K7MeO_^k~L|6W~w{yo^hA^`^fO&1F+*L!4e@O=)ywt+=}
zsf@qLfOTSzjDL^6z^VYgZsiwU7#h4m{aps%jTc9I10bi;9{(aRRdY9B#YF;HoZwFa
l0Fd~kVjCMEEgG<NE926^(ZJpjznlR>4U}bLlT^Zy{vXccD%}78

diff --git a/dsLightRag/Test/T1_Login.py b/dsLightRag/WxGzh/T1_Login.py
similarity index 89%
rename from dsLightRag/Test/T1_Login.py
rename to dsLightRag/WxGzh/T1_Login.py
index e313e8ab..83eb3e0d 100644
--- a/dsLightRag/Test/T1_Login.py
+++ b/dsLightRag/WxGzh/T1_Login.py
@@ -7,6 +7,8 @@
 import json
 import logging
 
+from torch.distributed.elastic.timer import expires
+
 """
 # 查看selenium版本
 pip show selenium
@@ -55,18 +57,21 @@ if __name__ == '__main__':
     driver.get('https://mp.weixin.qq.com/')
     # 获取cookies
     cookie_items = driver.get_cookies()
+    expiry=-1
     # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
     for cookie_item in cookie_items:
         cookies[cookie_item['name']] = cookie_item['value']
+        if('expiry' in cookie_item and cookie_item['expiry'] > expiry):
+            expiry = cookie_item['expiry']
 
     if "slave_sid" not in cookies:
         logging.info("登录公众号失败，获取cookie失败")
         exit()
-    # cookies = json.dumps(post)  # 注释掉这一行
 
     # 将cookies写入文件
+    cookies["expiry"] = expiry
     with open('cookies.txt', mode='w', encoding="utf-8") as f:
-        f.write(json.dumps(cookies))
+        f.write(json.dumps(cookies, indent=4, ensure_ascii=False))
     # 关闭浏览器
     driver.quit()
     # 输出提示
diff --git a/dsLightRag/Test/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
similarity index 89%
rename from dsLightRag/Test/T2_GetArticleList.py
rename to dsLightRag/WxGzh/T2_GetArticleList.py
index bbf4318f..f06849b4 100644
--- a/dsLightRag/Test/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -37,6 +37,21 @@ if __name__ == '__main__':
         content = f.read()
     # 使用json还原为json对象
     cookies = json.loads(content)
+    # "expiry": 1787106233
+    # 检查是否有过期时间
+    expiry=cookies["expiry"]
+    if expiry:
+        # 换算出过期时间
+        expiry_time = time.localtime(expiry)
+        expiry_date = time.strftime("%Y-%m-%d %H:%M:%S", expiry_time)
+        print("cookies的过期时间一般是4天，cookies过期时间：", expiry_date)
+        # 获取当前时间戳
+        current_timestamp = time.time()
+        # 检查是否已过期
+        if current_timestamp > expiry:
+            print("Cookie已过期")
+            exit()
+
     options = Options()
     options.add_argument('-headless')  # 无头参数，调试时可以注释掉
     # 设置headers - 使用微信内置浏览器的User-Agent
diff --git a/dsLightRag/Test/T3_GetArticle.py b/dsLightRag/WxGzh/T3_GetArticle.py
similarity index 100%
rename from dsLightRag/Test/T3_GetArticle.py
rename to dsLightRag/WxGzh/T3_GetArticle.py
diff --git a/dsLightRag/WxGzh/__init__.py b/dsLightRag/WxGzh/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dsLightRag/Test/article_urls.txt b/dsLightRag/WxGzh/article_urls.txt
similarity index 100%
rename from dsLightRag/Test/article_urls.txt
rename to dsLightRag/WxGzh/article_urls.txt
diff --git a/dsLightRag/WxGzh/cookies.txt b/dsLightRag/WxGzh/cookies.txt
new file mode 100644
index 00000000..7183301e
--- /dev/null
+++ b/dsLightRag/WxGzh/cookies.txt
@@ -0,0 +1,17 @@
+{
+    "_clsk": "2gtve8|1752546228205|1|1|mp.weixin.qq.com/weheat-agent/payload/record",
+    "xid": "16332bed01be1055e236ad45b33af8df",
+    "data_bizuin": "3514353238",
+    "slave_user": "gh_4f88a4e194da",
+    "slave_sid": "QzBRX1FWTXNMaEdJYnc4ODBaM3FJU3RRbjVJNFE2N2IzMXFyVGlRQ0V5YklvNGFOc3NBWHdjV2J5OVg5U0JBVXdfdGhSU3lObXRheG1TdFUyXzVFcTFYS3E1NTh2aTlnSlBOOUluMUljUnBkYktjeUJDM216WVJNYzJKQkx2eW9Ib1duUk1yWXI3RndTa2dK",
+    "rand_info": "CAESIFwUSYus3XR5tFa1+b5ytJeuGAQS02d07zNBJNfi+Ftk",
+    "data_ticket": "9gQ088/vC7+jqxfFxBKS2aRx/JjmzJt+8HyuDLJtQBgpVej1hfSG1A0FQKWBbHQh",
+    "bizuin": "3514353238",
+    "mm_lang": "zh_CN",
+    "slave_bizuin": "3514353238",
+    "uuid": "8c5dc8e06af66d00a4b8e8596c8662eb",
+    "ua_id": "y1HZNMSzYCWuaUJDAAAAAApPVJ0a_arX_A5zqoUh6P8=",
+    "wxuin": "52546211515015",
+    "_clck": "msq32d|1|fxm|0",
+    "expiry": 1787106233
+}
\ No newline at end of file

From 36f57caaee2e54f7894bf4301b2b0f206429756f Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 10:34:58 +0800
Subject: [PATCH 17/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index f06849b4..0c40c584 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -39,7 +39,7 @@ if __name__ == '__main__':
     cookies = json.loads(content)
     # "expiry": 1787106233
     # 检查是否有过期时间
-    expiry=cookies["expiry"]
+    expiry = cookies["expiry"]
     if expiry:
         # 换算出过期时间
         expiry_time = time.localtime(expiry)
@@ -51,6 +51,8 @@ if __name__ == '__main__':
         if current_timestamp > expiry:
             print("Cookie已过期")
             exit()
+    # 移除expiry属性
+    del cookies["expiry"]
 
     options = Options()
     options.add_argument('-headless')  # 无头参数，调试时可以注释掉
@@ -80,7 +82,8 @@ if __name__ == '__main__':
             logging.info("微信token:" + token)
 
     article_urls = []
-    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"}]
+    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"},
+              {"account_name": "致知物理", "account_id": "zhizhiphysics"}]
     for item in gzlist:
         account_name = item["account_name"]
         account_id = item["account_id"]
@@ -147,7 +150,7 @@ if __name__ == '__main__':
     # 将返回的地址写入到文件
     with open('article_urls.txt', 'w', encoding='utf-8') as f:
         for record in article_urls:
-            f.write(record['title']+" "+record['publish_time']+" "+record['url'] + '\n')
+            f.write(record['title'] + " " + record['publish_time'] + " " + record['url'] + '\n')
 
         # 关闭浏览器
     driver.quit()

From bf4ce692d7a629668d7a0ddfa37c128ba92b4c9e Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 10:35:45 +0800
Subject: [PATCH 18/46] 'commit'

---
 dsLightRag/WxGzh/article_urls.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/dsLightRag/WxGzh/article_urls.txt b/dsLightRag/WxGzh/article_urls.txt
index c8d57b34..06ea3b09 100644
--- a/dsLightRag/WxGzh/article_urls.txt
+++ b/dsLightRag/WxGzh/article_urls.txt
@@ -9,3 +9,9 @@
 长春市第十九中学2025年职称评聘拟通过人员名单的公示！有你认识的老师吗？ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=2&sn=693d13964e4be18718c0eb4fd13ba68f&chksm=84e1abb2b39622a442a9f7cea8ddc72820050b2896968f2d0ae283c7caca2dbe014a721feb2e#rd
 高分喜报频传！长春这所小学靠啥成为“学霸制造机”？ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=1&sn=a0af7f484d6a3300a9b7f3d787a2594d&chksm=84e1ab9cb396228a56420696eb09071ff829d58e8e31bd652f849f3cbd0ee276b0baad7a1e89#rd
 蝉联冠军！吉大尚德游泳队斩获骄人成绩！ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=2&sn=cabb682e99978bf2a58ff0e9e06dc53d&chksm=84e1ab9cb396228a5cd457cd7ee0728491e6b3dc34fbde02240624364cfa8a9e2c533052d2b4#rd
+明日（16日）公布高中一批次录取结果，查询通道在此，请收好！ 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=1&sn=431c1f89b968ddff2165466ce20b2976&chksm=feb6a485c9c12d930da2340a813d24d5dd168688af162b4e7bdcd42d5c31d832ef9dc915b1dc#rd
+长春2024-2025九上试卷合集（赠答案） 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=2&sn=ffd3261777f03cdfcd78e7a899f25778&chksm=feb6a485c9c12d93a16db0ce8d9850cc843b22442b5d49850f93c99efbbc160855ff1a2314cf#rd
+网传各初中2025中考最高分准吗？ 2025-07-13 09:22:08 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546662&idx=1&sn=5c8a0cc82f0aab69a600d06b6e63a57f&chksm=feb6a48bc9c12d9da8ed3b2a19d12fa275f83796201448996bdfa2204bf41f7d826430ae30be#rd
+长春市2025年中考各批次录取最低控制线确定 2025-07-12 10:05:49 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546644&idx=1&sn=ea3371033b95e7203e881947c980a8a4&chksm=feb6a4b9c9c12daf64cbd87239cd2fdc22a93e6e0d2555ce3c5b66bc0e96028bfc3565021201#rd
+长春市2025年中考成绩将于7月12日12时公布 2025-07-11 15:13:54 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546642&idx=1&sn=718e0fa8463273260dae093e0686b7e0&chksm=feb6a4bfc9c12da9ab59ff8d7da3caefbb44d8195bb694b31cd5b3cab8fc6505b20c61a0ae62#rd
+长春2024-2025九上试卷合集（赠答案） 2025-07-09 10:56:48 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546544&idx=1&sn=d07bf1b38403c0578ad67ae007ce6159&chksm=feb6a51dc9c12c0b9b90a2131a9ba913b92ed2eab3dcaa78fadccb6b231e4f5cb4247750f910#rd

From 352d2d71a4f582c119001bdf47fc75c47366fde3 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:00:49 +0800
Subject: [PATCH 19/46] 'commit'

---
 .../Config/__pycache__/Config.cpython-310.pyc | Bin 911 -> 911 bytes
 .../PostgreSQLUtil.cpython-310.pyc            | Bin 1565 -> 1565 bytes
 dsLightRag/WxGzh/T2_GetArticleList.py         |  21 ++++++++++++++++--
 dsLightRag/WxGzh/article_urls.txt             |  12 +++++-----
 4 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/dsLightRag/Config/__pycache__/Config.cpython-310.pyc b/dsLightRag/Config/__pycache__/Config.cpython-310.pyc
index 76e8086d4dbc8bb523dc4cfcd66e1f5ab1971f11..d08d90dfe3ea0cfe56c5b74d7d02171c7f2b11d0 100644
GIT binary patch
delta 220
zcmeBY?`P-E=jG*M0D|S-B^$Z#GS){i7bT|qX^KR#Ir>D#J9<OtVDBhyA0OX%#{kcG
zZ`a5u9uUXLG1xUeG{`5455)8JcX9QJ_jB}hjS_J6b#iq<7U6}6z_jy0xNrkFA$%84
zU!y1ysAk8A_z-_@SHJjR&nVX@PCr+F6EC1`o>6>IX0U4z*tW?tnWPz`CU0d*Wn%|=
Lq=<d80&_k9PToB7

delta 220
zcmeBY?`P-E=jG*M00Mu$;*H#Q8S8H`7bT|qX^KR#Ir>D#J9<OtVDDSpK0dzjjsc$W
z-ma0ict9K{$6(j^&>){%d?22$zl*C+yq}}5>n#CSUnf@=WD#D72uwR4gbO!-6T)}#
z^fkIA0@dso5g+33?dlgF>>1^Hi__25-^2@Oo98V)C^OhK2yENrnM~4*w<d38N@WuP
MdZb8XvI28H0A#d6_5c6?

diff --git a/dsLightRag/Util/__pycache__/PostgreSQLUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/PostgreSQLUtil.cpython-310.pyc
index aada6b85691079cb14517057f6b2b6d02d299487..a95beed5195ce35f6b1c02e2ef633d03e771ea10 100644
GIT binary patch
delta 134
zcmbQsGna=upO=@50SFoy3pa9qU}lV({D--hQF?M0%Vb8`$#Sf+0a3ip`FUxX>3R?<
ziZ{SNIK(~3H8|b_$Ux!-_y-|#LxWv|q6FcJ0~~{c!~KI?q9%8<DzQlett^tAyqPr%
E01N~skN^Mx

delta 134
zcmbQsGna=upO=@50SHcgE7-{Wftm5v<Uh>4j53qESSB;dO_pPo4Y<YYoS&DLnXU(+
zZt({A2Zy)^xdz9302xT!0RJFlZfLM;&@Dl@;sD3s;BfySms^v&S(VsifL0dCP2S9!
F1ptK9D^UOd

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 0c40c584..a9b9ae09 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -12,7 +12,18 @@ import random
 import re
 
 import requests
+import asyncio
+from Util.PostgreSQLUtil import init_postgres_pool
 
+async def get_wechat_sources():
+    """从t_wechat_source表获取微信公众号列表"""
+    try:
+        pool = await init_postgres_pool()
+        async with pool.acquire() as conn:
+            rows = await conn.fetch('SELECT account_id, account_name FROM t_wechat_source')
+            return [dict(row) for row in rows]
+    finally:
+        await pool.close()
 """
 # 查看selenium版本
 pip show selenium
@@ -82,8 +93,14 @@ if __name__ == '__main__':
             logging.info("微信token:" + token)
 
     article_urls = []
-    gzlist = [{"account_name": "长春教育八卦阵", "account_id": "jybg100"},
-              {"account_name": "致知物理", "account_id": "zhizhiphysics"}]
+    # 替换硬编码的gzlist
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        gzlist = loop.run_until_complete(get_wechat_sources())
+    finally:
+        loop.close()
+
     for item in gzlist:
         account_name = item["account_name"]
         account_id = item["account_id"]
diff --git a/dsLightRag/WxGzh/article_urls.txt b/dsLightRag/WxGzh/article_urls.txt
index 06ea3b09..7e5eccc0 100644
--- a/dsLightRag/WxGzh/article_urls.txt
+++ b/dsLightRag/WxGzh/article_urls.txt
@@ -1,3 +1,9 @@
+明日（16日）公布高中一批次录取结果，查询通道在此，请收好！ 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=1&sn=431c1f89b968ddff2165466ce20b2976&chksm=feb6a485c9c12d930da2340a813d24d5dd168688af162b4e7bdcd42d5c31d832ef9dc915b1dc#rd
+长春2024-2025九上试卷合集（赠答案） 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=2&sn=ffd3261777f03cdfcd78e7a899f25778&chksm=feb6a485c9c12d93a16db0ce8d9850cc843b22442b5d49850f93c99efbbc160855ff1a2314cf#rd
+网传各初中2025中考最高分准吗？ 2025-07-13 09:22:08 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546662&idx=1&sn=5c8a0cc82f0aab69a600d06b6e63a57f&chksm=feb6a48bc9c12d9da8ed3b2a19d12fa275f83796201448996bdfa2204bf41f7d826430ae30be#rd
+长春市2025年中考各批次录取最低控制线确定 2025-07-12 10:05:49 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546644&idx=1&sn=ea3371033b95e7203e881947c980a8a4&chksm=feb6a4b9c9c12daf64cbd87239cd2fdc22a93e6e0d2555ce3c5b66bc0e96028bfc3565021201#rd
+长春市2025年中考成绩将于7月12日12时公布 2025-07-11 15:13:54 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546642&idx=1&sn=718e0fa8463273260dae093e0686b7e0&chksm=feb6a4bfc9c12da9ab59ff8d7da3caefbb44d8195bb694b31cd5b3cab8fc6505b20c61a0ae62#rd
+长春2024-2025九上试卷合集（赠答案） 2025-07-09 10:56:48 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546544&idx=1&sn=d07bf1b38403c0578ad67ae007ce6159&chksm=feb6a51dc9c12c0b9b90a2131a9ba913b92ed2eab3dcaa78fadccb6b231e4f5cb4247750f910#rd
 长春中考上演“神仙打架”！省二力旺等五校过半考生超700分！ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd
 独家专访赫行学校2025年中考“双黄蛋”！学霸靠啥杀出重围？ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=2&sn=c7733f7c2c6331e51e55af695f99a43e&chksm=84e1ab0cb396221a7d185dcb99acc9dce45cc5c66c3eef42680a215b710bb9bfa9fd10da4419#rd
 长春40所学校中考成绩曝光！700+成批涌现！谁是最大黑马？ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=1&sn=1f314640ae6eec236b0e16271bd44362&chksm=84e1ab1db396220b73ae08898a026d887436501a6c42abe01d7fa4aef9063533fad89720d3b8#rd
@@ -9,9 +15,3 @@
 长春市第十九中学2025年职称评聘拟通过人员名单的公示！有你认识的老师吗？ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=2&sn=693d13964e4be18718c0eb4fd13ba68f&chksm=84e1abb2b39622a442a9f7cea8ddc72820050b2896968f2d0ae283c7caca2dbe014a721feb2e#rd
 高分喜报频传！长春这所小学靠啥成为“学霸制造机”？ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=1&sn=a0af7f484d6a3300a9b7f3d787a2594d&chksm=84e1ab9cb396228a56420696eb09071ff829d58e8e31bd652f849f3cbd0ee276b0baad7a1e89#rd
 蝉联冠军！吉大尚德游泳队斩获骄人成绩！ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=2&sn=cabb682e99978bf2a58ff0e9e06dc53d&chksm=84e1ab9cb396228a5cd457cd7ee0728491e6b3dc34fbde02240624364cfa8a9e2c533052d2b4#rd
-明日（16日）公布高中一批次录取结果，查询通道在此，请收好！ 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=1&sn=431c1f89b968ddff2165466ce20b2976&chksm=feb6a485c9c12d930da2340a813d24d5dd168688af162b4e7bdcd42d5c31d832ef9dc915b1dc#rd
-长春2024-2025九上试卷合集（赠答案） 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=2&sn=ffd3261777f03cdfcd78e7a899f25778&chksm=feb6a485c9c12d93a16db0ce8d9850cc843b22442b5d49850f93c99efbbc160855ff1a2314cf#rd
-网传各初中2025中考最高分准吗？ 2025-07-13 09:22:08 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546662&idx=1&sn=5c8a0cc82f0aab69a600d06b6e63a57f&chksm=feb6a48bc9c12d9da8ed3b2a19d12fa275f83796201448996bdfa2204bf41f7d826430ae30be#rd
-长春市2025年中考各批次录取最低控制线确定 2025-07-12 10:05:49 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546644&idx=1&sn=ea3371033b95e7203e881947c980a8a4&chksm=feb6a4b9c9c12daf64cbd87239cd2fdc22a93e6e0d2555ce3c5b66bc0e96028bfc3565021201#rd
-长春市2025年中考成绩将于7月12日12时公布 2025-07-11 15:13:54 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546642&idx=1&sn=718e0fa8463273260dae093e0686b7e0&chksm=feb6a4bfc9c12da9ab59ff8d7da3caefbb44d8195bb694b31cd5b3cab8fc6505b20c61a0ae62#rd
-长春2024-2025九上试卷合集（赠答案） 2025-07-09 10:56:48 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546544&idx=1&sn=d07bf1b38403c0578ad67ae007ce6159&chksm=feb6a51dc9c12c0b9b90a2131a9ba913b92ed2eab3dcaa78fadccb6b231e4f5cb4247750f910#rd

From af3f8098c4567cd48bf630a6c535fe4d313a2576 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:04:40 +0800
Subject: [PATCH 20/46] 'commit'

---
 dsLightRag/WxGzh/T3_GetArticle.py  | 41 ---------------------------
 dsLightRag/WxGzh/Util/WxGzhUtil.py | 45 ++++++++++++++++++++++++++++++
 dsLightRag/WxGzh/Util/__init__.py  |  0
 3 files changed, 45 insertions(+), 41 deletions(-)
 delete mode 100644 dsLightRag/WxGzh/T3_GetArticle.py
 create mode 100644 dsLightRag/WxGzh/Util/WxGzhUtil.py
 create mode 100644 dsLightRag/WxGzh/Util/__init__.py

diff --git a/dsLightRag/WxGzh/T3_GetArticle.py b/dsLightRag/WxGzh/T3_GetArticle.py
deleted file mode 100644
index 2f9c9e27..00000000
--- a/dsLightRag/WxGzh/T3_GetArticle.py
+++ /dev/null
@@ -1,41 +0,0 @@
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service as ChromeService
-from selenium.webdriver.common.by import By
-
-url = 'http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd'
-
-options = Options()
-options.add_argument('-headless')  # 无头参数，调试时可以注释掉
-service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-driver = webdriver.Chrome(service=service, options=options)
-driver.get(url)
-# 可以只要txt
-html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
-# 第一行是标题，分离出来
-title = html_content.split('\n')[0]
-print(title)
-
-# 按行遍历html_content，当发现空行时，删除空行前面的内容，只保留后面的内容
-lines = html_content.split('\n')
-content_after_empty_line = ""
-found_empty_line = False
-
-for line in lines:
-    if not found_empty_line and line.strip() == "":
-        # 找到第一个空行
-        found_empty_line = True
-        continue
-
-    if found_empty_line:
-        # 空行后的内容添加到结果中
-        content_after_empty_line += line + "\n"
-
-# 如果没有找到空行，保留原始内容
-if not found_empty_line:
-    content_after_empty_line = html_content
-
-content_after_empty_line = content_after_empty_line.replace("\n\n", "\n")
-print(content_after_empty_line)
-# 关闭浏览器
-driver.quit()
diff --git a/dsLightRag/WxGzh/Util/WxGzhUtil.py b/dsLightRag/WxGzh/Util/WxGzhUtil.py
new file mode 100644
index 00000000..6b6d55b1
--- /dev/null
+++ b/dsLightRag/WxGzh/Util/WxGzhUtil.py
@@ -0,0 +1,45 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service as ChromeService
+from selenium.webdriver.common.by import By
+
+def get_article_content(url):
+    """
+    获取微信公众号文章内容
+    :param url: 文章URL
+    :return: 文章内容文本
+    """
+    options = Options()
+    options.add_argument('-headless')
+    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+    driver = webdriver.Chrome(service=service, options=options)
+    
+    try:
+        driver.get(url)
+        html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
+        
+        # 处理内容，提取空行后的文本
+        lines = html_content.split('\n')
+        content_after_empty_line = ""
+        found_empty_line = False
+
+        for line in lines:
+            if not found_empty_line and line.strip() == "":
+                found_empty_line = True
+                continue
+
+            if found_empty_line:
+                content_after_empty_line += line + "\n"
+
+        if not found_empty_line:
+            content_after_empty_line = html_content
+
+        return content_after_empty_line.replace("\n\n", "\n")
+    finally:
+        driver.quit()
+
+if __name__ == '__main__':
+    # 示例用法
+    url = 'http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd'
+    content = get_article_content(url)
+    print(content)
diff --git a/dsLightRag/WxGzh/Util/__init__.py b/dsLightRag/WxGzh/Util/__init__.py
new file mode 100644
index 00000000..e69de29b

From 2ad3154fe87ddd18612f75a77ff603c68439edfd Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:09:16 +0800
Subject: [PATCH 21/46] 'commit'

---
 dsLightRag/Util/WxGzhUtil.py                  | 100 ++++++++++++++++++
 .../__pycache__/WxGzhUtil.cpython-310.pyc     | Bin 0 -> 2483 bytes
 dsLightRag/WxGzh/T2_GetArticleList.py         |  15 ++-
 dsLightRag/WxGzh/Util/WxGzhUtil.py            |  45 --------
 dsLightRag/WxGzh/Util/__init__.py             |   0
 dsLightRag/WxGzh/article_urls.txt             |  17 +--
 6 files changed, 122 insertions(+), 55 deletions(-)
 create mode 100644 dsLightRag/Util/WxGzhUtil.py
 create mode 100644 dsLightRag/Util/__pycache__/WxGzhUtil.cpython-310.pyc
 delete mode 100644 dsLightRag/WxGzh/Util/WxGzhUtil.py
 delete mode 100644 dsLightRag/WxGzh/Util/__init__.py

diff --git a/dsLightRag/Util/WxGzhUtil.py b/dsLightRag/Util/WxGzhUtil.py
new file mode 100644
index 00000000..d4bf9138
--- /dev/null
+++ b/dsLightRag/Util/WxGzhUtil.py
@@ -0,0 +1,100 @@
+import datetime
+import random
+import requests
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.chrome.service import Service as ChromeService
+from selenium.webdriver.common.by import By
+
+def init_wechat_browser():
+    """初始化微信爬虫浏览器实例"""
+    options = Options()
+    options.add_argument('-headless')
+    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+    return webdriver.Chrome(service=service, options=options)
+
+def get_wechat_articles(account_name, account_id, token, cookies, header):
+    """获取指定公众号的文章列表"""
+    article_urls = []
+    
+    # 搜索微信公众号的接口地址
+    search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
+    query_id = {
+        'action': 'search_biz',
+        'token': token,
+        'lang': 'zh_CN',
+        'f': 'json',
+        'ajax': '1',
+        'random': random.random(),
+        'query': account_name,
+        'begin': '0',
+        'count': '5'
+    }
+    
+    # 完整实现搜索和获取文章逻辑
+    search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
+    lists = search_response.json().get('list')[0]
+    fakeid = lists.get('fakeid')
+    
+    # 微信公众号文章接口
+    appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
+    query_id_data = {
+        'token': token,
+        'lang': 'zh_CN',
+        'f': 'json',
+        'ajax': '1',
+        'random': random.random(),
+        'action': 'list_ex',
+        'begin': '0',
+        'count': '5',
+        'query': '',
+        'fakeid': fakeid,
+        'type': '9'
+    }
+    
+    query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
+    fakeid_list = query_fakeid_response.json().get('app_msg_list')
+    
+    for item in fakeid_list:
+        article_urls.append({
+            'title': item.get('title'),
+            'url': item.get('link'),
+            'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
+        })
+    
+    return article_urls
+
+def get_article_content(url):
+    """
+    获取微信公众号文章内容
+    :param url: 文章URL
+    :return: 文章内容文本
+    """
+    options = Options()
+    options.add_argument('-headless')
+    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
+    driver = webdriver.Chrome(service=service, options=options)
+    
+    try:
+        driver.get(url)
+        html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
+        
+        # 处理内容，提取空行后的文本
+        lines = html_content.split('\n')
+        content_after_empty_line = ""
+        found_empty_line = False
+
+        for line in lines:
+            if not found_empty_line and line.strip() == "":
+                found_empty_line = True
+                continue
+
+            if found_empty_line:
+                content_after_empty_line += line + "\n"
+
+        if not found_empty_line:
+            content_after_empty_line = html_content
+
+        return content_after_empty_line.replace("\n\n", "\n")
+    finally:
+        driver.quit()
\ No newline at end of file
diff --git a/dsLightRag/Util/__pycache__/WxGzhUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/WxGzhUtil.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9742d49875cc692f0c4f8f3f838f7f92bca1af9
GIT binary patch
literal 2483
zcmai0>x&#k5TEXO?(E|(m&Ew?l$^Ng-KkN;MU8rDMDmcF5?oG(x#_;$x!ZYMPp|i2
z76e~`81pS+h_d@hL&AL%5hQ-{pXje<llaL`A_N4hdv;Gn(HXY7s=KDTy1IT<n^dc=
z29Nl$+y14ZX+KHfuMY;{1^8tI2+;`YXw70oO{8Sp#4zHH9vMvo$u+%WMpn~OywR~E
zr&&_G*>R(CvkbgN><_eNg*c=HW0kn13}cO;MXg@>8Ftc+*O4e~&}G8HcPRtGK9TY;
zi8IMAQub~bP+;*(TY*#`Khj+Tehcu+9s<I(_qCpuYkfq}03nTIf{A_>^$^!rjU43~
zG0tMx*6bbx_2KC+hF_f-esX?zb?4d-gBz!>-2D9V^>5DI{Oa`Z^RI_HUtYU<=2mI8
zO?}d#S$6B-LSyMv7?WfpTUy-8IE@ZHu@tmf64Ao*Il4(NAu$0%(B=MehkB{c+n2Dg
zv*HxOPDUp)nGP<9icg5=vsO2vF&EVZWo>k1CJ!jAVy=!ET$#qoKrU%)5{6rkz1&zL
z*{OuB!SDu4R%yV2I~umy{G{JndW(mhrBj<nw%d}MOSi<{Fb=u5L4&r>y=4X`p{xX^
zG6#r-UF4z~VpAXlCXn$XcayuTNDE+ETF)e?XAzu3go!~+uJyG6#9m%EaW0m6Hb<*C
zw{kr<a+{RS8a*d>`uafYW8w}FDc_;>N_h#S$^i9TQtg%hZj02Et%=+vlVs1ua<8H+
zmX*bdvRKWlBa2gDqn6iR*50|NH<6otBcA}vdk093`v!=ct7hNgwvzV`(EE@feTUoz
zo*cNS%WPRcmYa|px2xQ^`RTXAbLX#r^6_xz!tlc@*S`O3c<$R97e2Ut{^J{$`@_?p
z-5h+q{YabhG;7SyN9o)K4L8GhZhd_&NTT_m70xb)@q9*o7PObc?Zd(?cn<_&ZnwRK
z<HA_Yl31X}1$s=Nr|KnP`GL%PVe({+#=_|MaZ8{TVfd^5rm&a~$rlN8y-V4aFqdg7
z1l=I%#=NczJ4lkXkY>V`8B19vtkh?Il!2Wv;|#&Q32Vh)qaoRT=zm@J(=^IjhlMRa
z_UNY21bU`kW_N-ac9$e~OLC7S_eyeyB-4`IFUft9JP1S>d@H4p>1pbLMNis;6NbD)
zrDyS)D0frhbL#OhqT73B-kzO_W@pIs%xjIAV~v@`y3QVzcJwamh)UXB1`BPaTra7u
zg~KSsB;%RTTa>f?Qrm+4XiS77O-sw7x&ly@T*mz<6?zDWb09D)a;`oJ9tJ9wUhIRm
zJC;IHocRG|wTLa~G9Cs1aPTSHy<6#^qi)9wiI^DOIHOq##f%D59z839;ui6h-;3(_
z6Ayg##r{GpZpz#Jx;&at@iRgwKdK58WRG-iFC2ah?va0)gl_ga5Ca=J#uyc^H6DMP
zs!jpw|6CidjP!n;s@0H#4dlRkCvtx`EcObR3uHQtA6Y_V1yS{32N+d}QI7smiLt80
zI7hM)_w?=Oa=mZnn#3P)fX`Kjm;=08>KVDwcZmhKHi-?D#_8F)3HaU*b;QmZP)o`K
zjo53LH3(Fj_1WBJFG0O=h;=}FTI*SICOeWFJuA2R6=*xw0H2b(R0kT;7I(P~tzPa@
zaN3=QrvQG@5q5z;Lj6Y{4tKs&S`CG)X}G+`bg}r>$)m*zqrA)F@$^uA11i@qUQz4J
zf}NNxiHsPN9l7bUFlc+wMnYd8m&soC>YLlxb?Y_NCRkOf)+CWVflWyA1dwKB1sak^
zJ5=@$CTn)nT{!yU;-Yu_#bd7k^ynslElWG095@S8VKbU``~bkW-VHfGXkJ>|Z7#wp
znpv~b=26EBl9<cZ1i2g2ta<xL_WTu2nMb3PZvmoX+MHa0Is~rl&M0uxf6dV`ct*xl
z+B^(iSXR-2Fm$3x<mv{3yu*_~F*0xk;rAemQ3dbA>;$OSC)ktHcU68K2M*v2p@_q7
zRP@gK$0C&H(Z7qiqG8XC`f$;O|3h!oxS9N;p>Cjg_rDoUq9}>ymbcUus0OM~qTKvT
i2qYSr!m9IZl#p(RJ}+^U?E~Uy5@MCn>tM&O9Qp%Tx!MB&

literal 0
HcmV?d00001

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index a9b9ae09..8aff8387 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -14,6 +14,7 @@ import re
 import requests
 import asyncio
 from Util.PostgreSQLUtil import init_postgres_pool
+from Util.WxGzhUtil import init_wechat_browser, get_wechat_articles
 
 async def get_wechat_sources():
     """从t_wechat_source表获取微信公众号列表"""
@@ -78,7 +79,9 @@ if __name__ == '__main__':
     }
 
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service, options=options)
+    driver = webdriver.Chrome(service=service, options=options)  # 删除这行
+    # 使用统一的初始化方式
+    driver = init_wechat_browser()
 
     # 方法3：使用requests库发送请求获取重定向URL
     url = 'https://mp.weixin.qq.com'
@@ -93,15 +96,20 @@ if __name__ == '__main__':
             logging.info("微信token:" + token)
 
     article_urls = []
-    # 替换硬编码的gzlist
+    # 初始化浏览器
+    driver = init_wechat_browser()
+    
+    # 获取公众号列表
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
     try:
         gzlist = loop.run_until_complete(get_wechat_sources())
     finally:
         loop.close()
-
+    
+    # 爬取文章
     for item in gzlist:
+        article_urls = get_wechat_articles(item["account_name"], item["account_id"], token, cookies, header)
         account_name = item["account_name"]
         account_id = item["account_id"]
         # 搜索微信公众号的接口地址
@@ -171,4 +179,3 @@ if __name__ == '__main__':
 
         # 关闭浏览器
     driver.quit()
-    print("所有文章爬取完成！")
diff --git a/dsLightRag/WxGzh/Util/WxGzhUtil.py b/dsLightRag/WxGzh/Util/WxGzhUtil.py
deleted file mode 100644
index 6b6d55b1..00000000
--- a/dsLightRag/WxGzh/Util/WxGzhUtil.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service as ChromeService
-from selenium.webdriver.common.by import By
-
-def get_article_content(url):
-    """
-    获取微信公众号文章内容
-    :param url: 文章URL
-    :return: 文章内容文本
-    """
-    options = Options()
-    options.add_argument('-headless')
-    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service, options=options)
-    
-    try:
-        driver.get(url)
-        html_content = driver.find_element(By.CLASS_NAME, "rich_media").text
-        
-        # 处理内容，提取空行后的文本
-        lines = html_content.split('\n')
-        content_after_empty_line = ""
-        found_empty_line = False
-
-        for line in lines:
-            if not found_empty_line and line.strip() == "":
-                found_empty_line = True
-                continue
-
-            if found_empty_line:
-                content_after_empty_line += line + "\n"
-
-        if not found_empty_line:
-            content_after_empty_line = html_content
-
-        return content_after_empty_line.replace("\n\n", "\n")
-    finally:
-        driver.quit()
-
-if __name__ == '__main__':
-    # 示例用法
-    url = 'http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd'
-    content = get_article_content(url)
-    print(content)
diff --git a/dsLightRag/WxGzh/Util/__init__.py b/dsLightRag/WxGzh/Util/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/dsLightRag/WxGzh/article_urls.txt b/dsLightRag/WxGzh/article_urls.txt
index 7e5eccc0..dc2077eb 100644
--- a/dsLightRag/WxGzh/article_urls.txt
+++ b/dsLightRag/WxGzh/article_urls.txt
@@ -1,9 +1,14 @@
-明日（16日）公布高中一批次录取结果，查询通道在此，请收好！ 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=1&sn=431c1f89b968ddff2165466ce20b2976&chksm=feb6a485c9c12d930da2340a813d24d5dd168688af162b4e7bdcd42d5c31d832ef9dc915b1dc#rd
-长春2024-2025九上试卷合集（赠答案） 2025-07-15 08:38:04 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546664&idx=2&sn=ffd3261777f03cdfcd78e7a899f25778&chksm=feb6a485c9c12d93a16db0ce8d9850cc843b22442b5d49850f93c99efbbc160855ff1a2314cf#rd
-网传各初中2025中考最高分准吗？ 2025-07-13 09:22:08 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546662&idx=1&sn=5c8a0cc82f0aab69a600d06b6e63a57f&chksm=feb6a48bc9c12d9da8ed3b2a19d12fa275f83796201448996bdfa2204bf41f7d826430ae30be#rd
-长春市2025年中考各批次录取最低控制线确定 2025-07-12 10:05:49 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546644&idx=1&sn=ea3371033b95e7203e881947c980a8a4&chksm=feb6a4b9c9c12daf64cbd87239cd2fdc22a93e6e0d2555ce3c5b66bc0e96028bfc3565021201#rd
-长春市2025年中考成绩将于7月12日12时公布 2025-07-11 15:13:54 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546642&idx=1&sn=718e0fa8463273260dae093e0686b7e0&chksm=feb6a4bfc9c12da9ab59ff8d7da3caefbb44d8195bb694b31cd5b3cab8fc6505b20c61a0ae62#rd
-长春2024-2025九上试卷合集（赠答案） 2025-07-09 10:56:48 http://mp.weixin.qq.com/s?__biz=MzU5OTQ0MzEzOA==&mid=2247546544&idx=1&sn=d07bf1b38403c0578ad67ae007ce6159&chksm=feb6a51dc9c12c0b9b90a2131a9ba913b92ed2eab3dcaa78fadccb6b231e4f5cb4247750f910#rd
+长春中考上演“神仙打架”！省二力旺等五校过半考生超700分！ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd
+独家专访赫行学校2025年中考“双黄蛋”！学霸靠啥杀出重围？ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=2&sn=c7733f7c2c6331e51e55af695f99a43e&chksm=84e1ab0cb396221a7d185dcb99acc9dce45cc5c66c3eef42680a215b710bb9bfa9fd10da4419#rd
+长春40所学校中考成绩曝光！700+成批涌现！谁是最大黑马？ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=1&sn=1f314640ae6eec236b0e16271bd44362&chksm=84e1ab1db396220b73ae08898a026d887436501a6c42abe01d7fa4aef9063533fad89720d3b8#rd
+喜报！长春外国语学校女子篮球队夺得冠军！ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=2&sn=31651043acb6ecbf4232e92e635196b6&chksm=84e1ab1db396220b0810c3bdf332128b110d1902658f2556eaeff67cec084a8a068a5ae9a275#rd
+“趣闯盛夏·探无界”！探秘一实验银河小学夏令营 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=3&sn=8edf6ce8cebdaad55343b39639876c27&chksm=84e1ab1db396220b26b172b3b565f919f7ded4c2a5b78227294ea29a558a7666c33b8c1de660#rd
+刚刚！2025年长春中考各批次控制线公布！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=1&sn=282e5e824410a9a92a83dd800cb58a7c&chksm=84e1aba6b39622b03fe6422032474c9696f83541d9ff9b8b6a9f0f099ce459da430f720d05e4#rd
+重磅消息！师大附属实验学校（经开）校长有新任命！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=2&sn=9449c87935faf86ddcc5a674ea888913&chksm=84e1aba6b39622b03fd8413ff1e74b61f662ec8deb3887c2c5b8e5ad15470b15ae14b21e94ea#rd
+市教育局最新发布！长春2025年中考成绩将于7月12日公布！ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=1&sn=a5b4104d2fe74ace32ab31faf5f1c44c&chksm=84e1abb2b39622a40b0e0969e84fb00c753cc8ffefb8624726afa2a7352ea725c7f967bf25f5#rd
+长春市第十九中学2025年职称评聘拟通过人员名单的公示！有你认识的老师吗？ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=2&sn=693d13964e4be18718c0eb4fd13ba68f&chksm=84e1abb2b39622a442a9f7cea8ddc72820050b2896968f2d0ae283c7caca2dbe014a721feb2e#rd
+高分喜报频传！长春这所小学靠啥成为“学霸制造机”？ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=1&sn=a0af7f484d6a3300a9b7f3d787a2594d&chksm=84e1ab9cb396228a56420696eb09071ff829d58e8e31bd652f849f3cbd0ee276b0baad7a1e89#rd
+蝉联冠军！吉大尚德游泳队斩获骄人成绩！ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=2&sn=cabb682e99978bf2a58ff0e9e06dc53d&chksm=84e1ab9cb396228a5cd457cd7ee0728491e6b3dc34fbde02240624364cfa8a9e2c533052d2b4#rd
 长春中考上演“神仙打架”！省二力旺等五校过半考生超700分！ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd
 独家专访赫行学校2025年中考“双黄蛋”！学霸靠啥杀出重围？ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=2&sn=c7733f7c2c6331e51e55af695f99a43e&chksm=84e1ab0cb396221a7d185dcb99acc9dce45cc5c66c3eef42680a215b710bb9bfa9fd10da4419#rd
 长春40所学校中考成绩曝光！700+成批涌现！谁是最大黑马？ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=1&sn=1f314640ae6eec236b0e16271bd44362&chksm=84e1ab1db396220b73ae08898a026d887436501a6c42abe01d7fa4aef9063533fad89720d3b8#rd

From 24de098979caeea9e6c0add92c234cbbc2126229 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:15:40 +0800
Subject: [PATCH 22/46] 'commit'

---
 dsLightRag/Util/WxGzhUtil.py                  |  53 ------------------
 .../__pycache__/WxGzhUtil.cpython-310.pyc     | Bin 2483 -> 1364 bytes
 dsLightRag/WxGzh/T2_GetArticleList.py         |  37 +++++-------
 3 files changed, 13 insertions(+), 77 deletions(-)

diff --git a/dsLightRag/Util/WxGzhUtil.py b/dsLightRag/Util/WxGzhUtil.py
index d4bf9138..07abec69 100644
--- a/dsLightRag/Util/WxGzhUtil.py
+++ b/dsLightRag/Util/WxGzhUtil.py
@@ -1,6 +1,3 @@
-import datetime
-import random
-import requests
 from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
@@ -13,56 +10,6 @@ def init_wechat_browser():
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
     return webdriver.Chrome(service=service, options=options)
 
-def get_wechat_articles(account_name, account_id, token, cookies, header):
-    """获取指定公众号的文章列表"""
-    article_urls = []
-    
-    # 搜索微信公众号的接口地址
-    search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
-    query_id = {
-        'action': 'search_biz',
-        'token': token,
-        'lang': 'zh_CN',
-        'f': 'json',
-        'ajax': '1',
-        'random': random.random(),
-        'query': account_name,
-        'begin': '0',
-        'count': '5'
-    }
-    
-    # 完整实现搜索和获取文章逻辑
-    search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
-    lists = search_response.json().get('list')[0]
-    fakeid = lists.get('fakeid')
-    
-    # 微信公众号文章接口
-    appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
-    query_id_data = {
-        'token': token,
-        'lang': 'zh_CN',
-        'f': 'json',
-        'ajax': '1',
-        'random': random.random(),
-        'action': 'list_ex',
-        'begin': '0',
-        'count': '5',
-        'query': '',
-        'fakeid': fakeid,
-        'type': '9'
-    }
-    
-    query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
-    fakeid_list = query_fakeid_response.json().get('app_msg_list')
-    
-    for item in fakeid_list:
-        article_urls.append({
-            'title': item.get('title'),
-            'url': item.get('link'),
-            'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
-        })
-    
-    return article_urls
 
 def get_article_content(url):
     """
diff --git a/dsLightRag/Util/__pycache__/WxGzhUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/WxGzhUtil.cpython-310.pyc
index b9742d49875cc692f0c4f8f3f838f7f92bca1af9..5075ff6ce5e093e8119550083361d00de35b186a 100644
GIT binary patch
delta 198
zcmdlie1(fIpO=@50SNXUFU_!Jo5&~Q?*indFr+Z%Fyt~uF*1VKOgT)s%u&o>HggV3
zE^8DUn9Y*Ip34!%0c5kLu(dEmai*}RZ~$qp6wY7<O|F+9gE!8qWfW!uGKx8X1RE0<
zBNrndW0BBiFSa;F##@uuuq(0NVopyjnS7bu1;{Eb%9$*~QOn3Tc@~E;yF5^jpC<R@
bM;yJ3@{>zB)dYBeB0LN{j3CIt#KQpqwXP?V

delta 1323
zcmaJ=L66%+6!zF-$99}#n>Nsvwv>ugjbJw{g#&x3(6$mnYI~@JL?o~r&urq|*zt}h
z>^8EM14ZqDvY?e(soFz)Ljob89(v)*Kj47oOxkkdgg7Db#z~5};E})g-preCzW2ud
z#eRIeI;>P|1s?J9sQ-uaP4&!#KtvRR21;jcQ3n;V+EKx(11-cIjO3a&&_knR6h0o5
zLbGEPzCN(Sa;FTwK}vU&jzdghfvpgml)+XBx}~(7-{rwKT1Z%Xv`bj<8D%Z><Glz?
zELccP07JErjjD*%A@Fs8<+B2g1Meohi2&#+nNCn<5H(eaMljL2GF5UVEmRQm$jV9_
zZL6t~YAH@j#JZ0&Gc~7Lu1r;8=ZKU~DVdd8(}_x)9A!4CWaa;ok*X|-)KZ(QkYh*X
z%qb|#1*KC^R?^CXvJM>8w0c$f<ZM<;^(jtkaM1A_74}4qxW27V4PGkZMvgv5IhvZ}
zr3q+x`AEwVQg*MWI`rz~=yfRZ`BzV72M?Zpd3W~1quCeV|Mt_v*};=%kM2BuaQE4F
z)7ia;&+~7`Z}d5jlZ|#ejMw&PupdNgySr=tFl_t1V6_`W?Sy*F?{|ao6=BbP7XqP=
z`|joqfwz;%FcN58ptl5iw`B>#^LYSMIv?)PNZ^4N^#s}y*xUB@g~7at3`3#sjwrh=
z^e*iMkn0ae5pQXt<PV2C0Zl}yPd!3eB8=E$UYGz+kZ^{eE@5nWJ2W8UOLOg${}J!S
zahUY3h?3mx(tV)_^j@pXP6H2nMZy^gXC<7Ia7w~?39m_bRl-F8f%)y23I_;XV00xP
zlo;><ok(rbjwp{};&JNoAf)4C&5u``;cAndZ+@`RyuQ)A)za9Tl1Lk|fpFqc7byBg
za?2{t7AB+6m4qik>ru`cGTVRyX+(r6sU@?hY_VY|y@Y#VEVN(}abZGlw&Y}M1vL6a
zJKe~GbbA>EWM1`!f_C0s$Ynh62h;^o$&34y3eDnz+<=JM;?@~W;$f6fq08cvd0T8R
zeqXj&5{Sxjj|*x&(U^zx+j3uQFE1vF7UF=@@NgYB$R73k9(Nal68Xm_9|B+%YpSZE
z`7oCDuX?2pKl{IH%aujmpY>`LnJPvmjMK>e8yoD}V@v%MVRlJT4*S{#oxKhGhX>kK
zowcR+tCpe7!;kRg#!U!YHTDje`FF7{*T2U1jcao1=HWU0;>i=>nu_Fi7Q<A{l5_bV
DXP18g

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 8aff8387..153e43a6 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -5,6 +5,7 @@
 # https://blog.csdn.net/yajuanpi4899/article/details/121584268
 
 
+import asyncio
 import datetime
 import json
 import logging
@@ -12,9 +13,10 @@ import random
 import re
 
 import requests
-import asyncio
+
 from Util.PostgreSQLUtil import init_postgres_pool
-from Util.WxGzhUtil import init_wechat_browser, get_wechat_articles
+from Util.WxGzhUtil import init_wechat_browser, get_article_content
+
 
 async def get_wechat_sources():
     """从t_wechat_source表获取微信公众号列表"""
@@ -109,7 +111,6 @@ if __name__ == '__main__':
     
     # 爬取文章
     for item in gzlist:
-        article_urls = get_wechat_articles(item["account_name"], item["account_id"], token, cookies, header)
         account_name = item["account_name"]
         account_id = item["account_id"]
         # 搜索微信公众号的接口地址
@@ -155,27 +156,15 @@ if __name__ == '__main__':
 
         for item in fakeid_list:
             # 采集item示例
-            new_article = {
-                'title': item.get('title'),
-                'article_url': item.get('link'),
-                'account_id': account_id,
-                'account_name': account_name,
-                'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime(
-                    '%Y-%m-%d %H:%M:%S'),
-                'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
-            }
-            logging.info("new_article:", new_article)
-            article_urls.append({"title": item.get('title'), "url": item.get('link'),
-                                 "publish_time": datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime(
-                                     '%Y-%m-%d %H:%M:%S')})
+            article_url = item.get('link')
+            article_title = item.get('title')
+            publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
+            
+            # 直接获取并显示文章内容
+            print(f"正在处理文章: {article_title} ({publish_time})")
+            content = get_article_content(article_url)
+            print(f"文章内容预览: {content[:200]}...")
+            
             time.sleep(1)
-
-    for x in article_urls:
-        print(x)
-    # 将返回的地址写入到文件
-    with open('article_urls.txt', 'w', encoding='utf-8') as f:
-        for record in article_urls:
-            f.write(record['title'] + " " + record['publish_time'] + " " + record['url'] + '\n')
-
         # 关闭浏览器
     driver.quit()

From af62427756999c0765c809e370889e1cf8a8353d Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:16:53 +0800
Subject: [PATCH 23/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 153e43a6..65af9f41 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -27,6 +27,8 @@ async def get_wechat_sources():
             return [dict(row) for row in rows]
     finally:
         await pool.close()
+
+
 """
 # 查看selenium版本
 pip show selenium
@@ -100,7 +102,7 @@ if __name__ == '__main__':
     article_urls = []
     # 初始化浏览器
     driver = init_wechat_browser()
-    
+
     # 获取公众号列表
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
@@ -108,7 +110,7 @@ if __name__ == '__main__':
         gzlist = loop.run_until_complete(get_wechat_sources())
     finally:
         loop.close()
-    
+
     # 爬取文章
     for item in gzlist:
         account_name = item["account_name"]
@@ -159,12 +161,14 @@ if __name__ == '__main__':
             article_url = item.get('link')
             article_title = item.get('title')
             publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
-            
+
             # 直接获取并显示文章内容
+            if '试卷' in article_title: # 过滤掉试卷
+                continue
             print(f"正在处理文章: {article_title} ({publish_time})")
             content = get_article_content(article_url)
             print(f"文章内容预览: {content[:200]}...")
-            
+
             time.sleep(1)
         # 关闭浏览器
     driver.quit()

From 7101b19cb7fea5bbf75ce13dc95880661ec4ccb3 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:28:01 +0800
Subject: [PATCH 24/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 33 +++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 5 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 65af9f41..0bb8944c 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -47,6 +47,20 @@ from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
 
+async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, account_id):
+    try:
+        async with pool.acquire() as conn:
+            # 确保account_id是整数
+            account_id_int = int(account_id) if account_id else 0
+            await conn.execute('''
+                INSERT INTO t_wechat_articles 
+                (title, source, url, publish_time, content, source_id)
+                VALUES ($1, $2, $3, $4, $5, $6)
+            ''', article_title, account_name, article_url,
+               publish_time, content, account_id_int)  # 修改为整数类型
+    except Exception as e:
+        logging.error(f"保存文章失败: {e}")
+
 if __name__ == '__main__':
     # 从文件cookies.txt中获取
     with open('cookies.txt', 'r', encoding='utf-8') as f:
@@ -157,17 +171,26 @@ if __name__ == '__main__':
         fakeid_list = query_fakeid_response.json().get('app_msg_list')
 
         for item in fakeid_list:
-            # 采集item示例
             article_url = item.get('link')
             article_title = item.get('title')
-            publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S')
+            publish_time = datetime.datetime.fromtimestamp(int(item.get("update_time")))
 
-            # 直接获取并显示文章内容
-            if '试卷' in article_title: # 过滤掉试卷
+            if '试卷' in article_title:  # 过滤掉试卷
                 continue
+
             print(f"正在处理文章: {article_title} ({publish_time})")
             content = get_article_content(article_url)
-            print(f"文章内容预览: {content[:200]}...")
+
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            try:
+                pool = loop.run_until_complete(init_postgres_pool())
+                loop.run_until_complete(
+                    save_article_to_db(pool, article_title, account_name, article_url, publish_time, content,
+                                       account_id))
+            finally:
+                loop.run_until_complete(pool.close())
+                loop.close()
 
             time.sleep(1)
         # 关闭浏览器

From ed4aa1bd69a80592b284258656a03bfb4b0b5946 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:33:27 +0800
Subject: [PATCH 25/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 0bb8944c..207f5bbd 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -23,7 +23,7 @@ async def get_wechat_sources():
     try:
         pool = await init_postgres_pool()
         async with pool.acquire() as conn:
-            rows = await conn.fetch('SELECT account_id, account_name FROM t_wechat_source')
+            rows = await conn.fetch('SELECT * FROM t_wechat_source')
             return [dict(row) for row in rows]
     finally:
         await pool.close()
@@ -43,24 +43,24 @@ https://googlechromelabs.github.io/chrome-for-testing/
 https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
 """
 import time
-from selenium import webdriver
 from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
 
-async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, account_id):
+
+async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id):
     try:
         async with pool.acquire() as conn:
-            # 确保account_id是整数
-            account_id_int = int(account_id) if account_id else 0
+            # 更安全的account_id转换逻辑
             await conn.execute('''
-                INSERT INTO t_wechat_articles 
-                (title, source, url, publish_time, content, source_id)
-                VALUES ($1, $2, $3, $4, $5, $6)
-            ''', article_title, account_name, article_url,
-               publish_time, content, account_id_int)  # 修改为整数类型
+                               INSERT INTO t_wechat_articles
+                                   (title, source, url, publish_time, content, source_id)
+                               VALUES ($1, $2, $3, $4, $5, $6)
+                               ''', article_title, account_name, article_url,
+                               publish_time, content, id)
     except Exception as e:
         logging.error(f"保存文章失败: {e}")
 
+
 if __name__ == '__main__':
     # 从文件cookies.txt中获取
     with open('cookies.txt', 'r', encoding='utf-8') as f:
@@ -97,7 +97,6 @@ if __name__ == '__main__':
     }
 
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service, options=options)  # 删除这行
     # 使用统一的初始化方式
     driver = init_wechat_browser()
 
@@ -114,8 +113,6 @@ if __name__ == '__main__':
             logging.info("微信token:" + token)
 
     article_urls = []
-    # 初始化浏览器
-    driver = init_wechat_browser()
 
     # 获取公众号列表
     loop = asyncio.new_event_loop()
@@ -129,6 +126,7 @@ if __name__ == '__main__':
     for item in gzlist:
         account_name = item["account_name"]
         account_id = item["account_id"]
+        id = item["id"]
         # 搜索微信公众号的接口地址
         search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
         # 搜索微信公众号接口需要传入的参数，有三个变量：微信公众号token、随机数random、搜索的微信公众号名字
@@ -187,7 +185,7 @@ if __name__ == '__main__':
                 pool = loop.run_until_complete(init_postgres_pool())
                 loop.run_until_complete(
                     save_article_to_db(pool, article_title, account_name, article_url, publish_time, content,
-                                       account_id))
+                                       id))
             finally:
                 loop.run_until_complete(pool.close())
                 loop.close()

From 0686c3332cada42ae652a5cf36677c3a1d5aa41b Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:36:28 +0800
Subject: [PATCH 26/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 30 +++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 207f5bbd..99ec2cfa 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -47,16 +47,34 @@ from selenium.webdriver.chrome.options import Options
 from selenium.webdriver.chrome.service import Service as ChromeService
 
 
+async def is_article_exist(pool, article_url):
+    """检查文章URL是否已存在数据库中"""
+    try:
+        async with pool.acquire() as conn:
+            row = await conn.fetchrow('''
+                SELECT 1 FROM t_wechat_articles 
+                WHERE url = $1 LIMIT 1
+            ''', article_url)
+            return row is not None
+    except Exception as e:
+        logging.error(f"检查文章存在性失败: {e}")
+        return False  # 出错时默认返回False，避免影响正常流程
+
+
 async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id):
+    # 先检查文章是否已存在
+    if await is_article_exist(pool, article_url):
+        logging.info(f"文章已存在，跳过保存: {article_url}")
+        return
+        
     try:
         async with pool.acquire() as conn:
-            # 更安全的account_id转换逻辑
             await conn.execute('''
-                               INSERT INTO t_wechat_articles
-                                   (title, source, url, publish_time, content, source_id)
-                               VALUES ($1, $2, $3, $4, $5, $6)
-                               ''', article_title, account_name, article_url,
-                               publish_time, content, id)
+                INSERT INTO t_wechat_articles
+                    (title, source, url, publish_time, content, source_id)
+                VALUES ($1, $2, $3, $4, $5, $6)
+            ''', article_title, account_name, article_url,
+               publish_time, content, id)
     except Exception as e:
         logging.error(f"保存文章失败: {e}")
 

From a4c4bd7475d72ce432811bc5f526ab8528a96fa4 Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Tue, 15 Jul 2025 11:37:29 +0800
Subject: [PATCH 27/46] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../api/controller/UserController.py          | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/dsAiTeachingModel/api/controller/UserController.py b/dsAiTeachingModel/api/controller/UserController.py
index e23d8f5f..bff5dd85 100644
--- a/dsAiTeachingModel/api/controller/UserController.py
+++ b/dsAiTeachingModel/api/controller/UserController.py
@@ -3,6 +3,7 @@ import re
 
 from fastapi import APIRouter, Request, Response, Depends
 from auth.dependencies import *
+from utils.CommonUtil import md5_encrypt
 from utils.Database import *
 from utils.ParseRequest import *
 
@@ -29,4 +30,21 @@ async def modify_telephone(request: Request):
 
 
 # 【Base-User-2】维护用户密码
-# @router.post("/modifyPassword")
+@router.post("/modifyPassword")
+async def modify_password(request: Request):
+    person_id = await get_request_str_param(request, "person_id", True, True)
+    old_password = await get_request_str_param(request, "old_password", True, True)
+    password = await get_request_str_param(request, "password", True, True)
+    # 校验旧密码是否正确
+    select_password_sql: str = "select pwdmd5 from t_sys_loginperson where person_id = '" + person_id + "' and b_use = 1"
+    userlist = await find_by_sql(select_password_sql, ())
+    if len(userlist) == 0:
+        return {"success": False, "message": "用户不存在"}
+    else:
+        if userlist[0]["pwdmd5"] != md5_encrypt(old_password):
+            return {"success": False, "message": "旧密码错误"}
+        else:
+            update_password_sql: str = "update t_sys_loginperson set original_pwd = '" + password + "',pwdmd5 = '" + md5_encrypt(password) + "' where person_id = '" + person_id + "'"
+            await execute_sql(update_password_sql)
+            return {"success": True, "message": "修改成功"}
+

From e25942c28f11c7314a73c6ddc511950c9ec28668 Mon Sep 17 00:00:00 2001
From: "Kalman.CHENG" <123204464@qq.com>
Date: Tue, 15 Jul 2025 11:47:24 +0800
Subject: [PATCH 28/46] =?UTF-8?q?=E6=95=99=E8=82=B2=E5=9E=82=E7=9B=B4?=
 =?UTF-8?q?=E9=A2=86=E5=9F=9F=E5=A4=A7=E6=A8=A1=E5=9E=8B=E5=B9=B3=E5=8F=B0?=
 =?UTF-8?q?=20modify=20by=20Kalman.CHENG=20=E2=98=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 dsAiTeachingModel/api/controller/UserController.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsAiTeachingModel/api/controller/UserController.py b/dsAiTeachingModel/api/controller/UserController.py
index bff5dd85..2b1d1a3f 100644
--- a/dsAiTeachingModel/api/controller/UserController.py
+++ b/dsAiTeachingModel/api/controller/UserController.py
@@ -21,7 +21,7 @@ async def modify_telephone(request: Request):
     # 校验手机号码是否已被注册
     select_telephone_sql: str = "select * from t_sys_loginperson where b_use = 1 and telephone = '" + telephone + "' and person_id <> '" + person_id + "'"
     userlist = await find_by_sql(select_telephone_sql, ())
-    if len(userlist) > 0:
+    if userlist is not None:
         return {"success": False, "message": "手机号码已被注册"}
     else:
         update_telephone_sql: str = "update t_sys_loginperson set telephone = '" + telephone + "' where person_id = '" + person_id + "'"

From 870e3540a0150410d04e7fabb85d2226222e629c Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:47:47 +0800
Subject: [PATCH 29/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 99ec2cfa..43845916 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -18,6 +18,19 @@ from Util.PostgreSQLUtil import init_postgres_pool
 from Util.WxGzhUtil import init_wechat_browser, get_article_content
 
 
+# 在程序开始时添加以下配置
+logging.basicConfig(
+    level=logging.INFO,  # 设置日志级别为INFO
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+# 或者如果你想更详细地控制日志输出
+logger = logging.getLogger('WeiXinGongZhongHao')
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+logger.addHandler(handler)
+
 async def get_wechat_sources():
     """从t_wechat_source表获取微信公众号列表"""
     try:

From 933b88853b529a6d6a3e6661889be700eec5877e Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:48:43 +0800
Subject: [PATCH 30/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 43845916..001c2ec3 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -105,12 +105,12 @@ if __name__ == '__main__':
         # 换算出过期时间
         expiry_time = time.localtime(expiry)
         expiry_date = time.strftime("%Y-%m-%d %H:%M:%S", expiry_time)
-        print("cookies的过期时间一般是4天，cookies过期时间：", expiry_date)
+        logger.info("cookies的过期时间一般是4天，cookies过期时间：", expiry_date)
         # 获取当前时间戳
         current_timestamp = time.time()
         # 检查是否已过期
         if current_timestamp > expiry:
-            print("Cookie已过期")
+            logger.error("Cookie已过期")
             exit()
     # 移除expiry属性
     del cookies["expiry"]
@@ -136,12 +136,11 @@ if __name__ == '__main__':
     response = requests.get(url=url, allow_redirects=False, cookies=cookies)
     if 'Location' in response.headers:
         redirect_url = response.headers.get("Location")
-        print("重定向URL:", redirect_url)
+        logger.info("重定向URL:", redirect_url)
         token_match = re.findall(r'token=(\d+)', redirect_url)
         if token_match:
             token = token_match[0]
-            print("获取到的token:", token)
-            logging.info("微信token:" + token)
+            logger.info("获取到的token:", token)
 
     article_urls = []
 
@@ -207,7 +206,7 @@ if __name__ == '__main__':
             if '试卷' in article_title:  # 过滤掉试卷
                 continue
 
-            print(f"正在处理文章: {article_title} ({publish_time})")
+            logger.info(f"正在处理文章: {article_title} ({publish_time})")
             content = get_article_content(article_url)
 
             loop = asyncio.new_event_loop()

From d41d7c19a47462d053adf49147aeb0b487cf16ad Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:50:05 +0800
Subject: [PATCH 31/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index d958d01d..187f3dbd 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -106,7 +106,7 @@ if __name__ == '__main__':
         # 换算出过期时间
         expiry_time = time.localtime(expiry)
         expiry_date = time.strftime("%Y-%m-%d %H:%M:%S", expiry_time)
-        logger.info(f"cookies的过期时间一般是4天，cookies过期时间：%s" % expiry_date)
+
         # 获取当前时间戳
         current_timestamp = time.time()
         # 检查是否已过期
@@ -115,7 +115,7 @@ if __name__ == '__main__':
             exit()
     # 移除expiry属性
     del cookies["expiry"]
-
+    logger.info(f"cookies的过期时间一般是4天，cookies过期时间：%s" % expiry_date)
     options = Options()
     options.add_argument('-headless')  # 无头参数，调试时可以注释掉
     # 设置headers - 使用微信内置浏览器的User-Agent

From b7334a0d07d323c23bdc4978eb5cdeb7f5cbe327 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:51:20 +0800
Subject: [PATCH 32/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 187f3dbd..470abf3b 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -137,11 +137,11 @@ if __name__ == '__main__':
     response = requests.get(url=url, allow_redirects=False, cookies=cookies)
     if 'Location' in response.headers:
         redirect_url = response.headers.get("Location")
-        logger.info("重定向URL:", redirect_url)
+        logger.info(f"重定向URL:%s"%redirect_url)
         token_match = re.findall(r'token=(\d+)', redirect_url)
         if token_match:
             token = token_match[0]
-            logger.info("获取到的token:", token)
+            logger.info(f"获取到的token:%s"%token)
 
     article_urls = []
 

From 7f3ea89e3c9bc08549d9b4d0a6b252191a831bab Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:53:03 +0800
Subject: [PATCH 33/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index 470abf3b..a287c2f6 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -17,18 +17,15 @@ import requests
 from Util.PostgreSQLUtil import init_postgres_pool
 from Util.WxGzhUtil import init_wechat_browser, get_article_content
 
-# 在程序开始时添加以下配置
-logging.basicConfig(
-    level=logging.INFO,  # 设置日志级别为INFO
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-
-# 或者如果你想更详细地控制日志输出
+# 删除重复的日志配置，只保留以下内容
 logger = logging.getLogger('WeiXinGongZhongHao')
 logger.setLevel(logging.INFO)
-handler = logging.StreamHandler()
-handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-logger.addHandler(handler)
+
+# 确保只添加一个handler
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+    logger.addHandler(handler)
 
 
 async def get_wechat_sources():

From 5c10a56d315b4bde75df5cbe1a203958accd9f2d Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:53:55 +0800
Subject: [PATCH 34/46] 'commit'

---
 dsLightRag/WxGzh/T2_GetArticleList.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_GetArticleList.py
index a287c2f6..a6e073bb 100644
--- a/dsLightRag/WxGzh/T2_GetArticleList.py
+++ b/dsLightRag/WxGzh/T2_GetArticleList.py
@@ -75,7 +75,7 @@ async def is_article_exist(pool, article_url):
 async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, id):
     # 先检查文章是否已存在
     if await is_article_exist(pool, article_url):
-        logging.info(f"文章已存在，跳过保存: {article_url}")
+        logger.info(f"文章已存在，跳过保存: {article_url}")
         return
 
     try:

From 2785f4afb740ee2e73521343ad7ddbb7760a60cd Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:55:40 +0800
Subject: [PATCH 35/46] 'commit'

---
 dsLightRag/WxGzh/{T1_Login.py => T1_LoginGetCookie.py}          | 0
 dsLightRag/WxGzh/{T2_GetArticleList.py => T2_CollectArticle.py} | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename dsLightRag/WxGzh/{T1_Login.py => T1_LoginGetCookie.py} (100%)
 rename dsLightRag/WxGzh/{T2_GetArticleList.py => T2_CollectArticle.py} (100%)

diff --git a/dsLightRag/WxGzh/T1_Login.py b/dsLightRag/WxGzh/T1_LoginGetCookie.py
similarity index 100%
rename from dsLightRag/WxGzh/T1_Login.py
rename to dsLightRag/WxGzh/T1_LoginGetCookie.py
diff --git a/dsLightRag/WxGzh/T2_GetArticleList.py b/dsLightRag/WxGzh/T2_CollectArticle.py
similarity index 100%
rename from dsLightRag/WxGzh/T2_GetArticleList.py
rename to dsLightRag/WxGzh/T2_CollectArticle.py

From 67c1296dc1729aa72e89dbc42e78945c208e5f48 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:55:49 +0800
Subject: [PATCH 36/46] 'commit'

---
 dsLightRag/WxGzh/article_urls.txt | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 dsLightRag/WxGzh/article_urls.txt

diff --git a/dsLightRag/WxGzh/article_urls.txt b/dsLightRag/WxGzh/article_urls.txt
deleted file mode 100644
index dc2077eb..00000000
--- a/dsLightRag/WxGzh/article_urls.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-长春中考上演“神仙打架”！省二力旺等五校过半考生超700分！ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd
-独家专访赫行学校2025年中考“双黄蛋”！学霸靠啥杀出重围？ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=2&sn=c7733f7c2c6331e51e55af695f99a43e&chksm=84e1ab0cb396221a7d185dcb99acc9dce45cc5c66c3eef42680a215b710bb9bfa9fd10da4419#rd
-长春40所学校中考成绩曝光！700+成批涌现！谁是最大黑马？ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=1&sn=1f314640ae6eec236b0e16271bd44362&chksm=84e1ab1db396220b73ae08898a026d887436501a6c42abe01d7fa4aef9063533fad89720d3b8#rd
-喜报！长春外国语学校女子篮球队夺得冠军！ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=2&sn=31651043acb6ecbf4232e92e635196b6&chksm=84e1ab1db396220b0810c3bdf332128b110d1902658f2556eaeff67cec084a8a068a5ae9a275#rd
-“趣闯盛夏·探无界”！探秘一实验银河小学夏令营 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=3&sn=8edf6ce8cebdaad55343b39639876c27&chksm=84e1ab1db396220b26b172b3b565f919f7ded4c2a5b78227294ea29a558a7666c33b8c1de660#rd
-刚刚！2025年长春中考各批次控制线公布！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=1&sn=282e5e824410a9a92a83dd800cb58a7c&chksm=84e1aba6b39622b03fe6422032474c9696f83541d9ff9b8b6a9f0f099ce459da430f720d05e4#rd
-重磅消息！师大附属实验学校（经开）校长有新任命！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=2&sn=9449c87935faf86ddcc5a674ea888913&chksm=84e1aba6b39622b03fd8413ff1e74b61f662ec8deb3887c2c5b8e5ad15470b15ae14b21e94ea#rd
-市教育局最新发布！长春2025年中考成绩将于7月12日公布！ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=1&sn=a5b4104d2fe74ace32ab31faf5f1c44c&chksm=84e1abb2b39622a40b0e0969e84fb00c753cc8ffefb8624726afa2a7352ea725c7f967bf25f5#rd
-长春市第十九中学2025年职称评聘拟通过人员名单的公示！有你认识的老师吗？ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=2&sn=693d13964e4be18718c0eb4fd13ba68f&chksm=84e1abb2b39622a442a9f7cea8ddc72820050b2896968f2d0ae283c7caca2dbe014a721feb2e#rd
-高分喜报频传！长春这所小学靠啥成为“学霸制造机”？ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=1&sn=a0af7f484d6a3300a9b7f3d787a2594d&chksm=84e1ab9cb396228a56420696eb09071ff829d58e8e31bd652f849f3cbd0ee276b0baad7a1e89#rd
-蝉联冠军！吉大尚德游泳队斩获骄人成绩！ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=2&sn=cabb682e99978bf2a58ff0e9e06dc53d&chksm=84e1ab9cb396228a5cd457cd7ee0728491e6b3dc34fbde02240624364cfa8a9e2c533052d2b4#rd
-长春中考上演“神仙打架”！省二力旺等五校过半考生超700分！ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=1&sn=41f941e481be6a7ccd26ad734c8d7a13&chksm=84e1ab0cb396221a59bce5c4ee842c9326968daf4aea1c7d24e55ed8879789c3ef34a7ce5ed1#rd
-独家专访赫行学校2025年中考“双黄蛋”！学霸靠啥杀出重围？ 2025-07-14 18:36:34 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526302&idx=2&sn=c7733f7c2c6331e51e55af695f99a43e&chksm=84e1ab0cb396221a7d185dcb99acc9dce45cc5c66c3eef42680a215b710bb9bfa9fd10da4419#rd
-长春40所学校中考成绩曝光！700+成批涌现！谁是最大黑马？ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=1&sn=1f314640ae6eec236b0e16271bd44362&chksm=84e1ab1db396220b73ae08898a026d887436501a6c42abe01d7fa4aef9063533fad89720d3b8#rd
-喜报！长春外国语学校女子篮球队夺得冠军！ 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=2&sn=31651043acb6ecbf4232e92e635196b6&chksm=84e1ab1db396220b0810c3bdf332128b110d1902658f2556eaeff67cec084a8a068a5ae9a275#rd
-“趣闯盛夏·探无界”！探秘一实验银河小学夏令营 2025-07-13 18:48:27 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526287&idx=3&sn=8edf6ce8cebdaad55343b39639876c27&chksm=84e1ab1db396220b26b172b3b565f919f7ded4c2a5b78227294ea29a558a7666c33b8c1de660#rd
-刚刚！2025年长春中考各批次控制线公布！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=1&sn=282e5e824410a9a92a83dd800cb58a7c&chksm=84e1aba6b39622b03fe6422032474c9696f83541d9ff9b8b6a9f0f099ce459da430f720d05e4#rd
-重磅消息！师大附属实验学校（经开）校长有新任命！ 2025-07-12 10:04:32 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526196&idx=2&sn=9449c87935faf86ddcc5a674ea888913&chksm=84e1aba6b39622b03fd8413ff1e74b61f662ec8deb3887c2c5b8e5ad15470b15ae14b21e94ea#rd
-市教育局最新发布！长春2025年中考成绩将于7月12日公布！ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=1&sn=a5b4104d2fe74ace32ab31faf5f1c44c&chksm=84e1abb2b39622a40b0e0969e84fb00c753cc8ffefb8624726afa2a7352ea725c7f967bf25f5#rd
-长春市第十九中学2025年职称评聘拟通过人员名单的公示！有你认识的老师吗？ 2025-07-11 15:22:13 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526176&idx=2&sn=693d13964e4be18718c0eb4fd13ba68f&chksm=84e1abb2b39622a442a9f7cea8ddc72820050b2896968f2d0ae283c7caca2dbe014a721feb2e#rd
-高分喜报频传！长春这所小学靠啥成为“学霸制造机”？ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=1&sn=a0af7f484d6a3300a9b7f3d787a2594d&chksm=84e1ab9cb396228a56420696eb09071ff829d58e8e31bd652f849f3cbd0ee276b0baad7a1e89#rd
-蝉联冠军！吉大尚德游泳队斩获骄人成绩！ 2025-07-10 19:00:00 http://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==&mid=2651526158&idx=2&sn=cabb682e99978bf2a58ff0e9e06dc53d&chksm=84e1ab9cb396228a5cd457cd7ee0728491e6b3dc34fbde02240624364cfa8a9e2c533052d2b4#rd

From 73033e0333f772bd07e74693307f8cc00aef8fe0 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 11:57:09 +0800
Subject: [PATCH 37/46] 'commit'

---
 dsLightRag/Start.py                |  7 +--
 dsLightRag/T1_Train.py             |  6 ---
 dsLightRag/WxGzh/T3_TrainIntoKG.py | 68 ++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 12 deletions(-)
 create mode 100644 dsLightRag/WxGzh/T3_TrainIntoKG.py

diff --git a/dsLightRag/Start.py b/dsLightRag/Start.py
index 584315f5..a6b09126 100644
--- a/dsLightRag/Start.py
+++ b/dsLightRag/Start.py
@@ -17,13 +17,8 @@ from starlette.staticfiles import StaticFiles
 from Util.LightRagUtil import *
 from Util.PostgreSQLUtil import init_postgres_pool
 
-# 在程序开始时添加以下配置
-logging.basicConfig(
-    level=logging.INFO,  # 设置日志级别为INFO
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
 
-# 或者如果你想更详细地控制日志输出
+# 想更详细地控制日志输出
 logger = logging.getLogger('lightrag')
 logger.setLevel(logging.INFO)
 handler = logging.StreamHandler()
diff --git a/dsLightRag/T1_Train.py b/dsLightRag/T1_Train.py
index 1db08183..88080efa 100644
--- a/dsLightRag/T1_Train.py
+++ b/dsLightRag/T1_Train.py
@@ -4,12 +4,6 @@ import logging
 from Util.DocxUtil import get_docx_content_by_pandoc
 from Util.LightRagUtil import initialize_pg_rag
 
-# 在程序开始时添加以下配置
-logging.basicConfig(
-    level=logging.INFO,  # 设置日志级别为INFO
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-
 # 或者如果你想更详细地控制日志输出
 logger = logging.getLogger('lightrag')
 logger.setLevel(logging.INFO)
diff --git a/dsLightRag/WxGzh/T3_TrainIntoKG.py b/dsLightRag/WxGzh/T3_TrainIntoKG.py
new file mode 100644
index 00000000..c7de823c
--- /dev/null
+++ b/dsLightRag/WxGzh/T3_TrainIntoKG.py
@@ -0,0 +1,68 @@
+import asyncio
+import logging
+
+from Util.DocxUtil import get_docx_content_by_pandoc
+from Util.LightRagUtil import initialize_pg_rag
+
+
+logger = logging.getLogger('lightrag')
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler()
+handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+logger.addHandler(handler)
+logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
+
+# 使用PG库后，这个是没有用的,但目前的项目代码要求必传，就写一个吧。
+WORKING_DIR = f"./output"
+
+#### 下面两个要注意写清楚内容  ####
+# 1、工作空间【知识库名称】
+# 2、文档名称【不允许出现重复，因为后面需要以此为条件查询】
+tasks = [
+    # {  # 苏轼
+    #     "workspace": "SuShi", "docx_name": "苏轼.docx",
+    # },
+    # {  # 化学
+    #     "workspace": "Chemistry", "docx_name": "Chemistry.docx",
+    # },
+    # {  # 几何
+    #     "workspace": "JiHe", "docx_name": "JiHe.docx",
+    # },
+    # {  # 数学
+    #     "workspace": "Math", "docx_name": "Math.docx",
+    # },
+    # {  # 史记
+    #     "workspace": "ShiJi", "docx_name": "少年读史记张嘉骅.docx",
+    # },
+    # {  # 长春市一批次高中学校介绍
+    #     "workspace": "ChangChun", "docx_name": "长春市一批次高中学校介绍.docx",
+    # },
+    # {  # 2024长春43所高中录取分数线
+    #     "workspace": "ChangChun", "docx_name": "2024长春43所高中录取分数线.docx",
+    # },
+    {  # 长春市2025年中考各批次录取最低控制线
+        "workspace": "ChangChun", "docx_name": "长春市2025年中考各批次录取最低控制线.docx",
+    }
+]
+for task in tasks:
+    task["docx_path"] = "./static/Txt/" + task["docx_name"]  # 3、文档路径 python是按引用传递的&
+
+
+async def main():
+    for task in tasks:
+        workspace = task["workspace"]
+        docx_name = task["docx_name"]
+        docx_path = task["docx_path"]
+        logger.info(f"开始处理文档: {docx_name}" + ",共%s个文档,当前是第%s个。", len(tasks), tasks.index(task) + 1)
+        try:
+            rag = await initialize_pg_rag(WORKING_DIR=WORKING_DIR, workspace=workspace)
+            # 获取docx文件的内容
+            content = get_docx_content_by_pandoc(docx_path)
+            await rag.ainsert(input=content, file_paths=[docx_name])  # 添加来源参数
+        finally:
+            if rag:
+                await rag.finalize_storages()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

From f639b4c9fc23720f155bc9f8e42612110f4a1b4c Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:02:12 +0800
Subject: [PATCH 38/46] 'commit'

---
 .../__pycache__/LightRagUtil.cpython-310.pyc  | Bin 4511 -> 4511 bytes
 dsLightRag/WxGzh/T3_TrainIntoKG.py            |  77 ++++++++----------
 2 files changed, 36 insertions(+), 41 deletions(-)

diff --git a/dsLightRag/Util/__pycache__/LightRagUtil.cpython-310.pyc b/dsLightRag/Util/__pycache__/LightRagUtil.cpython-310.pyc
index 9bc1e54dac9dbe973c4131d5b80ceb60cf4263d8..d89e8018a6fc1ad3abe25bb0951aa47b9d96253a 100644
GIT binary patch
delta 1003
zcmYL|OK%cU6o8om!<1nL+NOz!u@8bu<D-wn`05*qEiKT}3cWriQ<&1R%nV)zYU4(i
zCb}?pZ#AyEG;wEKx#&O81RIyeKj6ZJiRTPO3Hi9^anIwP!$I^QDq51{cfse+41F(t
zbaR9qxyOLN-ShoKTF$1FmD!>)x4E9o>n_(D7h$WO7hITq_bhOyS<E|eHUhWl89+<e
z)~lMS8X6G{_3_-kt<?!@dM}85=rVv9WQX2y2!HH7Cl7;y_^MW=qET=R@#EYuo979b
zU-OaT5c&&%mbYr{X*A9gvF)rlIf5sGZq{wpENM<CrL0pTfknYul$vVqv_&XL*=kqR
z<9ijjhH-hS*$ry4tk};!`=0R^G2<n+BwXij;_()HE-Z6N_C;9XCs8rQ;@wl+0vnV1
z*pu$_T$;V>ei$XejArjybhntdEvi<uLpPB$V@oTSb+ZB$8HA1cV?#;IDOIRi+fnLm
zPnrf@T%cVTJ;|E>5s3NLKgefLk!45zRc@VyMSJuHI(n!F_0xzM#4KVC5&DN7JSEvD
zaT7`(lOpV=D6^S>%x$o2z!FnvUPLT8eH;gF3(FwJIm)xyo>MF#jc^5KNfns#SLx|h
zG!ikFO5ZtYkM!nJ39FR1Rl8B|=;T}~g`IU|_EH|W;&g#+6S-P76vHZO)%NiE{>wNS
zwZ3nb2>T|7Vfeq~SOMRK_;;I~ha$aYs{{h3?C5CLiQLpm@R%JXiKq?RQVi9EL3EUO
zMRm6EQ7|@%1-0Lh$5Rfmiom3F4dDjF-7T?@T}v-!Qc7ZRjVfr!J8$+QxRg-Qr7M$(
zbMU2Ml%g~aKhP_zG)(?-$uhU~3dOZ*dCi7V+i!K5HR}8uaN>m7p=oINLui6H3+b-g
zlsaqTsp@sR<ul7FrRsjWvURvZHxLCx5zz8!W>cq@NjFhN5O)#x5cd%(4E4cZz`zz%

delta 1003
zcmYL|$xjne7{EJBI~_WmX#q(jgvB%kiixsB5)UpwwXAIc%Nm_b+hIB|OL?6Z8!vJ+
z(S!N!qVdGh#GCQtf&W00C-G?f2RwN5eN(DU^XvPT_pPs|iPMDS%5o&YKG(aCK1iQ<
zj>0)V$@qt#@1^oeE~Bo_ZmV-Uj~;B8fxz1Ug|*;IZUBA+A990`3{4LXvD;*rfr|U4
zr8}CfQ_<F*&NV%~MxhZJldhmk5-|d2p(z&rA~d3m;hnJM8ir1oV_cZx#$aC9W#%_R
z|Mn>Qi|7-w++&@j1S++hRX<1UQZSvGr#U6v52cn(LS<%Au@<$ed6kyP3Q~)$in_wD
zqYD^UAi7s44&<a>_!2%5Zehk5SQ2jw%XnOY7vc(6fUn}Ju!@Q-q&n|%n=mQ&z_ZR#
zZV%pf9wcZit9!>TIo#gxT%sBJ8Bb;1F6!m7=@_gco5DmSd98psWrJwdidt)Vl0BwN
ziKGLg7oZUtXEEPJh6N23ML3VFaR&5B-oz3*21yY0IYb_@hFC|$|Dg*{1^6uOu+k^x
ze)uIRa6hVW705+hX$#Feh+V&ri|B&*n29Nl2r%1q71HuJcL1(zuq}U+pU<F?O8Lz4
z=4E@dJD*9rr43K>>b15`$!9XySzCrz%Fs=}3%_l}vTW6M%evKCUeA9SH>1^>PKm;I
zrH?KDx02k#xzNaAqdh3vU3N=Mz?5wrDWa3o&`WHXZ6)p3>Yl6Gn!^^+R^k=Se~m}6
z<Q*)iHA4YUB}5s4Nr{f&8B#ozwsPysi`k5tUR);)H0=8``x#qGpP)-eHk;zumkgkk
zAXDszD)=zj=5Lf-XWujkJ}Vk>>UOnBJDe)<{|&ft3SRskYxrY)de?uDe9a@opI6W_
sYaR_dWsMN6*{aN7S4ahMfH=emVclt%#C3>;DjVS-TtpRd!j}5vFY3lC1ONa4

diff --git a/dsLightRag/WxGzh/T3_TrainIntoKG.py b/dsLightRag/WxGzh/T3_TrainIntoKG.py
index c7de823c..86473413 100644
--- a/dsLightRag/WxGzh/T3_TrainIntoKG.py
+++ b/dsLightRag/WxGzh/T3_TrainIntoKG.py
@@ -3,7 +3,7 @@ import logging
 
 from Util.DocxUtil import get_docx_content_by_pandoc
 from Util.LightRagUtil import initialize_pg_rag
-
+from Util.PostgreSQLUtil import init_postgres_pool
 
 logger = logging.getLogger('lightrag')
 logger.setLevel(logging.INFO)
@@ -15,54 +15,49 @@ logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 # 使用PG库后，这个是没有用的,但目前的项目代码要求必传，就写一个吧。
 WORKING_DIR = f"./output"
 
-#### 下面两个要注意写清楚内容  ####
-# 1、工作空间【知识库名称】
-# 2、文档名称【不允许出现重复，因为后面需要以此为条件查询】
-tasks = [
-    # {  # 苏轼
-    #     "workspace": "SuShi", "docx_name": "苏轼.docx",
-    # },
-    # {  # 化学
-    #     "workspace": "Chemistry", "docx_name": "Chemistry.docx",
-    # },
-    # {  # 几何
-    #     "workspace": "JiHe", "docx_name": "JiHe.docx",
-    # },
-    # {  # 数学
-    #     "workspace": "Math", "docx_name": "Math.docx",
-    # },
-    # {  # 史记
-    #     "workspace": "ShiJi", "docx_name": "少年读史记张嘉骅.docx",
-    # },
-    # {  # 长春市一批次高中学校介绍
-    #     "workspace": "ChangChun", "docx_name": "长春市一批次高中学校介绍.docx",
-    # },
-    # {  # 2024长春43所高中录取分数线
-    #     "workspace": "ChangChun", "docx_name": "2024长春43所高中录取分数线.docx",
-    # },
-    {  # 长春市2025年中考各批次录取最低控制线
-        "workspace": "ChangChun", "docx_name": "长春市2025年中考各批次录取最低控制线.docx",
-    }
-]
-for task in tasks:
-    task["docx_path"] = "./static/Txt/" + task["docx_name"]  # 3、文档路径 python是按引用传递的&
 
+async def get_unprocessed_articles():
+    """从t_wechat_articles表获取未处理的文章"""
+    try:
+        pool = await init_postgres_pool()
+        async with pool.acquire() as conn:
+            rows = await conn.fetch('''
+                SELECT id, source, title, content 
+                FROM t_wechat_articles 
+                WHERE is_finish = 0
+            ''')
+            return [dict(row) for row in rows]
+    finally:
+        await pool.close()
 
 async def main():
-    for task in tasks:
-        workspace = task["workspace"]
-        docx_name = task["docx_name"]
-        docx_path = task["docx_path"]
-        logger.info(f"开始处理文档: {docx_name}" + ",共%s个文档,当前是第%s个。", len(tasks), tasks.index(task) + 1)
+    # 获取未处理的文章
+    articles = await get_unprocessed_articles()
+    logger.info(f"共获取到{len(articles)}篇未处理的文章")
+    
+    for article in articles:
+        workspace = 'ChangChun'
+        docx_name = f"{article['source']}_{article['title']}"  # 组合来源和标题作为文档名
+        content = article["content"]  # 使用文章内容
+        
+        logger.info(f"开始处理文档: {docx_name}")
         try:
             rag = await initialize_pg_rag(WORKING_DIR=WORKING_DIR, workspace=workspace)
-            # 获取docx文件的内容
-            content = get_docx_content_by_pandoc(docx_path)
-            await rag.ainsert(input=content, file_paths=[docx_name])  # 添加来源参数
+            await rag.ainsert(input=content, file_paths=[docx_name])
+            
+            # 标记为已处理
+            pool = await init_postgres_pool()
+            async with pool.acquire() as conn:
+                await conn.execute('''
+                    UPDATE t_wechat_articles 
+                    SET is_finish = 1 
+                    WHERE id = $1
+                ''', article["id"])
         finally:
             if rag:
                 await rag.finalize_storages()
-
+            if pool:
+                await pool.close()
 
 if __name__ == "__main__":
     asyncio.run(main())

From bd7399b14727276673cdacd181a63c5a9775f0c9 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:05:20 +0800
Subject: [PATCH 39/46] 'commit'

---
 .../postgres_impl.py                                          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/Doc/9、Postgresql支持工作空间的代码修改/postgres_impl.py b/dsLightRag/Doc/9、Postgresql支持工作空间的代码修改/postgres_impl.py
index f02ad79f..d18c6cc0 100644
--- a/dsLightRag/Doc/9、Postgresql支持工作空间的代码修改/postgres_impl.py
+++ b/dsLightRag/Doc/9、Postgresql支持工作空间的代码修改/postgres_impl.py
@@ -965,8 +965,8 @@ class PGDocStatusStorage(DocStatusStorage):
             else:
                 exist_keys = []
             new_keys = set([s for s in keys if s not in exist_keys])
-            print(f"keys: {keys}")
-            print(f"new_keys: {new_keys}")
+            #print(f"keys: {keys}")
+            #print(f"new_keys: {new_keys}")
             return new_keys
         except Exception as e:
             logger.error(

From db7f327f7b6217f7aa6a8960ac2208a331bb9137 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:29:17 +0800
Subject: [PATCH 40/46] 'commit'

---
 dsLightRag/static/ChangChun.html | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dsLightRag/static/ChangChun.html b/dsLightRag/static/ChangChun.html
index ac938d40..3a3b6666 100644
--- a/dsLightRag/static/ChangChun.html
+++ b/dsLightRag/static/ChangChun.html
@@ -200,13 +200,19 @@
                     <div class="example-item" onclick="fillExample('2025年各批次最低分数线是多少？')">
                         2025年各批次最低分数线是多少?
                     </div>
-                     <div class="example-item" onclick="fillExample('介绍一下师大自由校区？')">
+                    <div class="example-item" onclick="fillExample('介绍一下师大自由校区？')">
                         介绍一下师大自由校区?
                     </div>
                     <div class="example-item" onclick="fillExample('今年中考成绩690分，能上哪个高中呢？')">
                         今年中考成绩690分，能上哪个高中呢？
                     </div>
 
+                    <div class="example-item" onclick="fillExample('通达小学介绍')">
+                        通达小学介绍
+                    </div>
+                    <div class="example-item" onclick="fillExample('师大附属实验学校的马校长')">
+                        师大附属实验学校的马校长
+                    </div>
                 </div>
             </div>
         </div>

From 564a76f3f237091c6a6dcaf56173de8f3be9a90d Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:31:00 +0800
Subject: [PATCH 41/46] 'commit'

---
 dsLightRag/static/ChangChun.html | 4 ++--
 dsLightRag/static/ai.html        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dsLightRag/static/ChangChun.html b/dsLightRag/static/ChangChun.html
index 3a3b6666..7510be6f 100644
--- a/dsLightRag/static/ChangChun.html
+++ b/dsLightRag/static/ChangChun.html
@@ -3,7 +3,7 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>【长春市中考报考知识库】</title>
+    <title>【长春市教育信息资讯库】</title>
     <link rel="icon" href="data:,">
     <style>
         body {
@@ -176,7 +176,7 @@
 </head>
 <body>
 <div class="container">
-    <h1>【长春市中考报考知识库】</h1>
+    <h1>【长春市教育信息资讯库】</h1>
     <div class="data-area" id="answerArea">
         <div style="color:#666; padding:20px; text-align:center;">
             <p>请在下方输入您的问题，答案将在此处显示</p>
diff --git a/dsLightRag/static/ai.html b/dsLightRag/static/ai.html
index d0d0bdb7..17c0e247 100644
--- a/dsLightRag/static/ai.html
+++ b/dsLightRag/static/ai.html
@@ -176,7 +176,7 @@
 <body>
 <div class="container">
     <h1>【东师理想】教育大模型</h1>
-    <center><h3><a href="ChangChun.html">体验一、长春市中考报考知识库</a> <br><br><a href="ShiJi.html">体验二、关系图谱生成</a></h3></center>
+    <center><h3><a href="ChangChun.html">体验一、长春市教育信息资讯库</a> <br><br><a href="ShiJi.html">体验二、关系图谱生成</a></h3></center>
     <div class="data-area" id="answerArea">
         <div style="color:#666; padding:20px; text-align:center;">
             <p>请在下方输入您的问题，答案将在此处显示</p>

From 321536a178da8b953f3a76c75b45ff4d7a362df8 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:32:12 +0800
Subject: [PATCH 42/46] 'commit'

---
 dsLightRag/static/ChangChun.html | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dsLightRag/static/ChangChun.html b/dsLightRag/static/ChangChun.html
index 7510be6f..448bf325 100644
--- a/dsLightRag/static/ChangChun.html
+++ b/dsLightRag/static/ChangChun.html
@@ -177,6 +177,7 @@
 <body>
 <div class="container">
     <h1>【长春市教育信息资讯库】</h1>
+    <center><a href="#"><h3>资讯库维护</h3></a></center>
     <div class="data-area" id="answerArea">
         <div style="color:#666; padding:20px; text-align:center;">
             <p>请在下方输入您的问题，答案将在此处显示</p>

From 55d310f0145ad709383b580fa01b14549bc2f7dc Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:35:10 +0800
Subject: [PATCH 43/46] 'commit'

---
 dsLightRag/static/ChangChun.html        |  2 +-
 dsLightRag/static/ChangChunManager.html | 68 +++++++++++++++++++++++++
 2 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 dsLightRag/static/ChangChunManager.html

diff --git a/dsLightRag/static/ChangChun.html b/dsLightRag/static/ChangChun.html
index 448bf325..146411a3 100644
--- a/dsLightRag/static/ChangChun.html
+++ b/dsLightRag/static/ChangChun.html
@@ -177,7 +177,7 @@
 <body>
 <div class="container">
     <h1>【长春市教育信息资讯库】</h1>
-    <center><a href="#"><h3>资讯库维护</h3></a></center>
+    <center><a href="ChangChunManager.html"><h3>资讯库维护</h3></a></center>
     <div class="data-area" id="answerArea">
         <div style="color:#666; padding:20px; text-align:center;">
             <p>请在下方输入您的问题，答案将在此处显示</p>
diff --git a/dsLightRag/static/ChangChunManager.html b/dsLightRag/static/ChangChunManager.html
new file mode 100644
index 00000000..ade4b8dd
--- /dev/null
+++ b/dsLightRag/static/ChangChunManager.html
@@ -0,0 +1,68 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>长春市信息资讯库管理系统</title>
+    <link rel="stylesheet" href="layui/css/layui.css">
+</head>
+<body>
+    <div class="layui-container">
+        <div class="layui-row">
+            <div class="layui-col-md12">
+                <h2 class="layui-header">长春市信息资讯库管理系统</h2>
+            </div>
+        </div>
+        
+        <div class="layui-tab layui-tab-brief">
+            <ul class="layui-tab-title">
+                <li class="layui-this">信息来源</li>
+                <li>文章列表</li>
+            </ul>
+            <div class="layui-tab-content">
+                <div class="layui-tab-item layui-show">
+                    <table id="sourceTable" lay-filter="sourceTable"></table>
+                </div>
+                <div class="layui-tab-item">
+                    <table id="articleTable" lay-filter="articleTable"></table>
+                </div>
+            </div>
+        </div>
+    </div>
+
+    <script src="layui/layui.js"></script>
+    <script>
+        layui.use(['table', 'element'], function(){
+            var table = layui.table;
+            var element = layui.element;
+            
+            // 信息来源表格
+            table.render({
+                elem: '#sourceTable',
+                url: '/api/sources',
+                page: true,
+                cols: [[
+                    {field: 'id', title: 'ID', width:80},
+                    {field: 'name', title: '来源名称'},
+                    {field: 'type', title: '类型'},
+                    {field: 'update_time', title: '更新时间'}
+                ]]
+            });
+            
+            // 文章列表表格
+            table.render({
+                elem: '#articleTable',
+                url: '/api/articles',
+                page: true,
+                cols: [[
+                    {field: 'id', title: 'ID', width:80},
+                    {field: 'title', title: '标题'},
+                    {field: 'source', title: '来源'},
+                    {field: 'publish_date', title: '发布日期'},
+                    {field: 'collect_time', title: '采集时间'}
+                ]]
+            });
+        });
+    </script>
+</body>
+</html>
\ No newline at end of file

From 054da4899184dfe5fd2f8e1f6dc5ab694239b3c9 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:45:03 +0800
Subject: [PATCH 44/46] 'commit'

---
 dsLightRag/Start.py                     | 86 +++++++++++++++++++++++++
 dsLightRag/static/ChangChun.html        |  2 +-
 dsLightRag/static/ChangChunManager.html |  8 +--
 3 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/dsLightRag/Start.py b/dsLightRag/Start.py
index a6b09126..4c60c69f 100644
--- a/dsLightRag/Start.py
+++ b/dsLightRag/Start.py
@@ -295,5 +295,91 @@ async def render_html(request: fastapi.Request):
     }
 
 
+@app.get("/api/sources")
+async def get_sources(page: int = 1, limit: int = 10):
+    try:
+        pg_pool = await init_postgres_pool()
+        async with pg_pool.acquire() as conn:
+            # 获取总数
+            total = await conn.fetchval("SELECT COUNT(*) FROM t_wechat_source")
+            # 获取分页数据
+            offset = (page - 1) * limit
+            rows = await conn.fetch(
+                """
+                SELECT id, account_id,account_name, created_at 
+                FROM t_wechat_source 
+                ORDER BY created_at DESC 
+                LIMIT $1 OFFSET $2
+                """,
+                limit, offset
+            )
+            
+            sources = [
+                {
+                    "id": row[0],
+                    "name": row[1],
+                    "type": row[2],
+                    "update_time": row[3].strftime("%Y-%m-%d %H:%M:%S") if row[3] else None
+                }
+                for row in rows
+            ]
+            
+            return {
+                "code": 0,
+                "data": {
+                    "list": sources,
+                    "total": total,
+                    "page": page,
+                    "limit": limit
+                }
+            }
+    except Exception as e:
+        return {"code": 1, "msg": str(e)}
+
+
+@app.get("/api/articles")
+async def get_articles(page: int = 1, limit: int = 10):
+    try:
+        pg_pool = await init_postgres_pool()
+        async with pg_pool.acquire() as conn:
+            # 获取总数
+            total = await conn.fetchval("SELECT COUNT(*) FROM t_wechat_articles")
+            # 获取分页数据
+            offset = (page - 1) * limit
+            rows = await conn.fetch(
+                """
+                SELECT a.id, a.title, a.source as source as name, 
+                       a.publish_time, a.collection_time
+                FROM t_wechat_articles a
+                ORDER BY a.collection_time DESC 
+                LIMIT $1 OFFSET $2
+                """,
+                limit, offset
+            )
+            
+            articles = [
+                {
+                    "id": row[0],
+                    "title": row[1],
+                    "source": row[2],
+                    "publish_date": row[3].strftime("%Y-%m-%d") if row[3] else None,
+                    "collect_time": row[4].strftime("%Y-%m-%d %H:%M:%S") if row[4] else None
+                }
+                for row in rows
+            ]
+            
+            return {
+                "code": 0,
+                "data": {
+                    "list": articles,
+                    "total": total,
+                    "page": page,
+                    "limit": limit
+                }
+            }
+    except Exception as e:
+        return {"code": 1, "msg": str(e)}
+
+
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/dsLightRag/static/ChangChun.html b/dsLightRag/static/ChangChun.html
index 146411a3..a4105b5f 100644
--- a/dsLightRag/static/ChangChun.html
+++ b/dsLightRag/static/ChangChun.html
@@ -177,7 +177,7 @@
 <body>
 <div class="container">
     <h1>【长春市教育信息资讯库】</h1>
-    <center><a href="ChangChunManager.html"><h3>资讯库维护</h3></a></center>
+    <center><a href="ChangChunManager.html" target="_blank"><h3>资讯库维护</h3></a></center>
     <div class="data-area" id="answerArea">
         <div style="color:#666; padding:20px; text-align:center;">
             <p>请在下方输入您的问题，答案将在此处显示</p>
diff --git a/dsLightRag/static/ChangChunManager.html b/dsLightRag/static/ChangChunManager.html
index ade4b8dd..b29c1cfe 100644
--- a/dsLightRag/static/ChangChunManager.html
+++ b/dsLightRag/static/ChangChunManager.html
@@ -3,14 +3,14 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>长春市信息资讯库管理系统</title>
+    <title>长春市教育信息资讯库维护</title>
     <link rel="stylesheet" href="layui/css/layui.css">
 </head>
 <body>
     <div class="layui-container">
         <div class="layui-row">
             <div class="layui-col-md12">
-                <h2 class="layui-header">长春市信息资讯库管理系统</h2>
+                <h2 class="layui-header">长春市教育信息资讯库维护</h2>
             </div>
         </div>
         
@@ -43,8 +43,8 @@
                 page: true,
                 cols: [[
                     {field: 'id', title: 'ID', width:80},
-                    {field: 'name', title: '来源名称'},
-                    {field: 'type', title: '类型'},
+                    {field: 'name', title: '简称'},
+                    {field: 'type', title: '名称'},
                     {field: 'update_time', title: '更新时间'}
                 ]]
             });

From b6cfd9646bb823e81e5f71de67133bfca8c6c2be Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 13:46:49 +0800
Subject: [PATCH 45/46] 'commit'

---
 dsLightRag/static/ChangChunManager.html | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/dsLightRag/static/ChangChunManager.html b/dsLightRag/static/ChangChunManager.html
index b29c1cfe..a5193349 100644
--- a/dsLightRag/static/ChangChunManager.html
+++ b/dsLightRag/static/ChangChunManager.html
@@ -41,10 +41,18 @@
                 elem: '#sourceTable',
                 url: '/api/sources',
                 page: true,
+                parseData: function(res) { // 新增parseData函数处理返回数据
+                    return {
+                        "code": res.code,
+                        "msg": res.msg,
+                        "count": res.data.total,
+                        "data": res.data.list
+                    };
+                },
                 cols: [[
                     {field: 'id', title: 'ID', width:80},
-                    {field: 'name', title: '简称'},
-                    {field: 'type', title: '名称'},
+                    {field: 'name', title: '账号名称'},
+                    {field: 'type', title: '来源类型'},
                     {field: 'update_time', title: '更新时间'}
                 ]]
             });
@@ -54,6 +62,14 @@
                 elem: '#articleTable',
                 url: '/api/articles',
                 page: true,
+                parseData: function(res) {
+                    return {
+                        "code": res.code,
+                        "msg": res.msg,
+                        "count": res.data.total,
+                        "data": res.data.list
+                    };
+                },
                 cols: [[
                     {field: 'id', title: 'ID', width:80},
                     {field: 'title', title: '标题'},

From 81619320481d000f3ccf215c14619f9a29cf70ec Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Tue, 15 Jul 2025 14:01:24 +0800
Subject: [PATCH 46/46] 'commit'

---
 dsLightRag/Start.py                     |   7 +-
 dsLightRag/static/ChangChunManager.html | 136 ++++++++++++------------
 2 files changed, 73 insertions(+), 70 deletions(-)

diff --git a/dsLightRag/Start.py b/dsLightRag/Start.py
index 4c60c69f..26479237 100644
--- a/dsLightRag/Start.py
+++ b/dsLightRag/Start.py
@@ -348,8 +348,8 @@ async def get_articles(page: int = 1, limit: int = 10):
             offset = (page - 1) * limit
             rows = await conn.fetch(
                 """
-                SELECT a.id, a.title, a.source as source as name, 
-                       a.publish_time, a.collection_time
+                SELECT a.id, a.title, a.source as name, 
+                       a.publish_time, a.collection_time,a.url
                 FROM t_wechat_articles a
                 ORDER BY a.collection_time DESC 
                 LIMIT $1 OFFSET $2
@@ -363,7 +363,8 @@ async def get_articles(page: int = 1, limit: int = 10):
                     "title": row[1],
                     "source": row[2],
                     "publish_date": row[3].strftime("%Y-%m-%d") if row[3] else None,
-                    "collect_time": row[4].strftime("%Y-%m-%d %H:%M:%S") if row[4] else None
+                    "collect_time": row[4].strftime("%Y-%m-%d %H:%M:%S") if row[4] else None,
+                    "url": row[5],
                 }
                 for row in rows
             ]
diff --git a/dsLightRag/static/ChangChunManager.html b/dsLightRag/static/ChangChunManager.html
index a5193349..f0498218 100644
--- a/dsLightRag/static/ChangChunManager.html
+++ b/dsLightRag/static/ChangChunManager.html
@@ -7,78 +7,80 @@
     <link rel="stylesheet" href="layui/css/layui.css">
 </head>
 <body>
-    <div class="layui-container">
-        <div class="layui-row">
-            <div class="layui-col-md12">
-                <h2 class="layui-header">长春市教育信息资讯库维护</h2>
-            </div>
+<div class="layui-container">
+    <div class="layui-row">
+        <div class="layui-col-md12">
+            <h2 class="layui-header">长春市教育信息资讯库维护</h2>
         </div>
-        
-        <div class="layui-tab layui-tab-brief">
-            <ul class="layui-tab-title">
-                <li class="layui-this">信息来源</li>
-                <li>文章列表</li>
-            </ul>
-            <div class="layui-tab-content">
-                <div class="layui-tab-item layui-show">
-                    <table id="sourceTable" lay-filter="sourceTable"></table>
-                </div>
-                <div class="layui-tab-item">
-                    <table id="articleTable" lay-filter="articleTable"></table>
-                </div>
+    </div>
+
+    <div class="layui-tab layui-tab-brief">
+        <ul class="layui-tab-title">
+            <li class="layui-this">文章列表</li>
+            <li>信息来源</li>
+        </ul>
+        <div class="layui-tab-content">
+            <div class="layui-tab-item layui-show">
+                <table id="articleTable" lay-filter="articleTable"></table>
+            </div>
+            <div class="layui-tab-item">
+                <table id="sourceTable" lay-filter="sourceTable"></table>
             </div>
         </div>
     </div>
+</div>
+
+<script src="layui/layui.js"></script>
+<script>
+    layui.use(['table', 'element'], function () {
+        var table = layui.table;
+        var element = layui.element;
+
+        // 信息来源表格
+        table.render({
+            elem: '#sourceTable',
+            url: '/api/sources',
+            page: true,
+            parseData: function (res) { // 新增parseData函数处理返回数据
+                return {
+                    "code": res.code,
+                    "msg": res.msg,
+                    "count": res.data.total,
+                    "data": res.data.list
+                };
+            },
+            cols: [[
+                {field: 'id', title: '序号', width: 80},
+                {field: 'name', title: '账号名称'},
+                {field: 'type', title: '来源类型'},
+                {field: 'update_time', title: '更新时间'}
+            ]]
+        });
 
-    <script src="layui/layui.js"></script>
-    <script>
-        layui.use(['table', 'element'], function(){
-            var table = layui.table;
-            var element = layui.element;
-            
-            // 信息来源表格
-            table.render({
-                elem: '#sourceTable',
-                url: '/api/sources',
-                page: true,
-                parseData: function(res) { // 新增parseData函数处理返回数据
-                    return {
-                        "code": res.code,
-                        "msg": res.msg,
-                        "count": res.data.total,
-                        "data": res.data.list
-                    };
-                },
-                cols: [[
-                    {field: 'id', title: 'ID', width:80},
-                    {field: 'name', title: '账号名称'},
-                    {field: 'type', title: '来源类型'},
-                    {field: 'update_time', title: '更新时间'}
-                ]]
-            });
-            
-            // 文章列表表格
-            table.render({
-                elem: '#articleTable',
-                url: '/api/articles',
-                page: true,
-                parseData: function(res) {
-                    return {
-                        "code": res.code,
-                        "msg": res.msg,
-                        "count": res.data.total,
-                        "data": res.data.list
-                    };
-                },
-                cols: [[
-                    {field: 'id', title: 'ID', width:80},
-                    {field: 'title', title: '标题'},
-                    {field: 'source', title: '来源'},
-                    {field: 'publish_date', title: '发布日期'},
-                    {field: 'collect_time', title: '采集时间'}
-                ]]
-            });
+        // 文章列表表格
+        table.render({
+            elem: '#articleTable',
+            url: '/api/articles',
+            page: true,
+            parseData: function (res) {
+                return {
+                    "code": res.code,
+                    "msg": res.msg,
+                    "count": res.data.total,
+                    "data": res.data.list
+                };
+            },
+            cols: [[
+                {field: 'id', title: '序号', width: 80},
+                {field: 'title', title: '标题', templet: function(d){
+                    return '<a href="' + d.url + '" target="_blank" style="color: #1E88E5; text-decoration: underline; cursor: pointer;">' + d.title + '</a>';
+                }},
+                {field: 'source', title: '来源', width: 150},
+                {field: 'publish_date', title: '发布日期', width: 120},
+                {field: 'collect_time', title: '采集时间', width: 150}
+            ]]
         });
-    </script>
+    });
+</script>
 </body>
 </html>
\ No newline at end of file