From 69e8e833e6c6299d75cb0312118cb5b0f4782dac Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 14 Jul 2025 15:41:30 +0800
Subject: [PATCH] 'commit'

---
 .../Test/Test/Logs/article_bfc50bb7d7.html    | 162 ++++++++++++++++++
 dsLightRag/Test/TestCrawl.py                  | 148 ++++++++++++++--
 2 files changed, 295 insertions(+), 15 deletions(-)
 create mode 100644 dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
diff --git a/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html b/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
new file mode 100644
index 00000000..cd460649
--- /dev/null
+++ b/dsLightRag/Test/Test/Logs/article_bfc50bb7d7.html
@@ -0,0 +1,162 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta http-equiv=Content-Type content="text/html;charset=utf-8">
+<meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1.0,user-scalable=0,viewport-fit=cover">
+<meta name="color-scheme" content="light dark">
+<meta name="apple-mobile-web-app-capable" content="yes">
+<meta name="apple-mobile-web-app-status-bar-style" content="black">
+<meta name="format-detection" content="telephone=no">
+<link rel="shortcut icon" type="image/x-icon" href="//res.wx.qq.com/a/wx_fed/assets/res/NTI4MWU5.ico">
+<link rel="mask-icon" href="//res.wx.qq.com/a/wx_fed/assets/res/MjliNWVm.svg" color="#4C4C4C">
+<link rel="apple-touch-icon-precomposed" href="//res.wx.qq.com/a/wx_fed/assets/res/OTE0YTAw.png">
+
+        <title></title>
+        <script>
+        (() => {
+            
+            const ua = navigator.userAgent;
+            const noMobile = !(/(iPhone|iPad|iPod|iOS)/i.test(ua) || /Windows\sPhone/i.test(ua) || /(Android)/i.test(ua));
+            setTimeout(() => {
+                noMobile && document.title === '' && (document.title = '微信公众平台');
+            }, 1000);
+        })();
+        </script>
+        
+<link rel="stylesheet" type="text/css" href="//res.wx.qq.com/t/wx_fed/weui-source/res/2.5.14/weui.min.css">
+
+<link rel="stylesheet" href="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/style/page/secitptpage/verify7729a9.css" media="all">
+<script type="text/javascript">
+  
+</script>
+
+    </head>
+    <body class="zh_CN " ontouchstart="">
+         
+<div class="weui-msg">
+  <div id="tips" style="display:none;" class="top_tips warning"></div>
+      </div>
+
+<script type="text/javascript">
+var PAGE_MID='mmbizwap:secitptpage/verify.html';
+</script>
+        
+        <style>body,.wx-root{--weui-BG-0:#ededed;--weui-BG-1:#f7f7f7;--weui-BG-2:#fff;--weui-BG-3:#f7f7f7;--weui-BG-4:#4c4c4c;--weui-BG-5:#fff;--weui-FG-0:rgba(0,0,0,0.9);--weui-FG-HALF:rgba(0,0,0,0.9);--weui-FG-1:rgba(0,0,0,0.5);--weui-FG-2:rgba(0,0,0,0.3);--weui-FG-3:rgba(0,0,0,0.1);--weui-FG-4:rgba(0,0,0,0.15);--weui-FG-5:rgba(0,0,0,0.05);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#fa9d3b;--weui-YELLOW:#ffc300;--weui-GREEN:#91d300;--weui-LIGHTGREEN:#95ec69;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1485ee;--weui-PURPLE:#6467f0;--weui-WHITE:#fff;--weui-LINK:#576b95;--weui-TEXTGREEN:#06ae56;--weui-FG:#000;--weui-BG:#fff;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:#fa9d3b;--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:#06ae56;--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:#10aeff;--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(0,0,0,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(0,0,0,0.05)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.8);--weui-FG-HALF:rgba(255,255,255,0.6);--weui-FG-1:rgba(255,255,255,0.5);--weui-FG-2:rgba(255,255,255,0.3);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.8);--weui-FG-HALF:rgba(255,255,255,0.6);--weui-FG-1:rgba(255,255,255,0.5);--weui-FG-2:rgba(255,255,255,0.3);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}.wx-root[data-weui-mode='care'],body[data-weui-mode='care']{--weui-BG-0:#ededed;--weui-BG-1:#f7f7f7;--weui-BG-2:#fff;--weui-BG-3:#f7f7f7;--weui-BG-4:#4c4c4c;--weui-BG-5:#fff;--weui-FG-0:#000;--weui-FG-HALF:#000;--weui-FG-1:rgba(0,0,0,0.6);--weui-FG-2:rgba(0,0,0,0.42);--weui-FG-3:rgba(0,0,0,0.1);--weui-FG-4:rgba(0,0,0,0.15);--weui-FG-5:rgba(0,0,0,0.05);--weui-RED:#dc3636;--weui-REDORANGE:#ff6146;--weui-ORANGE:#e17719;--weui-YELLOW:#bb8e00;--weui-GREEN:#4f8400;--weui-LIGHTGREEN:#2e8800;--weui-BRAND:#018942;--weui-BLUE:#007dbb;--weui-INDIGO:#0075e2;--weui-PURPLE:#6265f1;--weui-WHITE:#fff;--weui-LINK:#576b95;--weui-TEXTGREEN:#06ae56;--weui-FG:#000;--weui-BG:#fff;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:#e17719;--weui-TAG-BACKGROUND-ORANGE:rgba(225,119,25,0.1);--weui-TAG-TEXT-GREEN:#06ae56;--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:#007dbb;--weui-TAG-BACKGROUND-BLUE:rgba(0,125,187,0.1);--weui-TAG-TEXT-BLACK:rgba(0,0,0,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(0,0,0,0.05)}@media(prefers-color-scheme:dark){.wx-root[data-weui-mode='care']:not([data-weui-theme='light']),body[data-weui-mode='care']:not([data-weui-theme='light']){--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.85);--weui-FG-HALF:rgba(255,255,255,0.65);--weui-FG-1:rgba(255,255,255,0.55);--weui-FG-2:rgba(255,255,255,0.35);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}}.wx-root[data-weui-mode='care'][data-weui-theme='dark'],body[data-weui-mode='care'][data-weui-theme='dark']{--weui-BG-0:#111;--weui-BG-1:#1e1e1e;--weui-BG-2:#191919;--weui-BG-3:#202020;--weui-BG-4:#404040;--weui-BG-5:#2c2c2c;--weui-FG-0:rgba(255,255,255,0.85);--weui-FG-HALF:rgba(255,255,255,0.65);--weui-FG-1:rgba(255,255,255,0.55);--weui-FG-2:rgba(255,255,255,0.35);--weui-FG-3:rgba(255,255,255,0.1);--weui-FG-4:rgba(255,255,255,0.15);--weui-FG-5:rgba(255,255,255,0.1);--weui-RED:#fa5151;--weui-REDORANGE:#ff6146;--weui-ORANGE:#c87d2f;--weui-YELLOW:#cc9c00;--weui-GREEN:#74a800;--weui-LIGHTGREEN:#3eb575;--weui-BRAND:#07c160;--weui-BLUE:#10aeff;--weui-INDIGO:#1196ff;--weui-PURPLE:#8183ff;--weui-WHITE:rgba(255,255,255,0.8);--weui-LINK:#7d90a9;--weui-TEXTGREEN:#259c5c;--weui-FG:#fff;--weui-BG:#000;--weui-TAG-TEXT-RED:rgba(250,81,81,0.6);--weui-TAG-BACKGROUND-RED:rgba(250,81,81,0.1);--weui-TAG-TEXT-ORANGE:rgba(250,157,59,0.6);--weui-TAG-BACKGROUND-ORANGE:rgba(250,157,59,0.1);--weui-TAG-TEXT-GREEN:rgba(6,174,86,0.6);--weui-TAG-BACKGROUND-GREEN:rgba(6,174,86,0.1);--weui-TAG-TEXT-BLUE:rgba(16,174,255,0.6);--weui-TAG-BACKGROUND-BLUE:rgba(16,174,255,0.1);--weui-TAG-TEXT-BLACK:rgba(255,255,255,0.5);--weui-TAG-BACKGROUND-BLACK:rgba(255,255,255,0.05)}.wx-root,body{--weui-BG-COLOR-ACTIVE:#ececec}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BG-COLOR-ACTIVE:#373737}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BG-COLOR-ACTIVE:#373737}}body,.wx-root,page{--weui-BTN-HEIGHT:48;--weui-BTN-HEIGHT-MEDIUM:40;--weui-BTN-HEIGHT-SMALL:32}.wx-root,body{--weui-BTN-ACTIVE-MASK:rgba(0,0,0,0.1)}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BTN-ACTIVE-MASK:rgba(255,255,255,0.1)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BTN-ACTIVE-MASK:rgba(255,255,255,0.1)}}.wx-root,body{--weui-BTN-DEFAULT-ACTIVE-BG:#e6e6e6}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-BTN-DEFAULT-ACTIVE-BG:rgba(255,255,255,0.126)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-BTN-DEFAULT-ACTIVE-BG:rgba(255,255,255,0.126)}}.wx-root,body{--weui-DIALOG-LINE-COLOR:rgba(0,0,0,0.1)}.wx-root[data-weui-theme='dark'],body[data-weui-theme='dark']{--weui-DIALOG-LINE-COLOR:rgba(255,255,255,0.1)}@media(prefers-color-scheme:dark){.wx-root:not([data-weui-theme='light']),body:not([data-weui-theme='light']){--weui-DIALOG-LINE-COLOR:rgba(255,255,255,0.1)}}.weui-hidden_abs{opacity:0;position:absolute;width:1px;height:1px;overflow:hidden}.weui-a11y_ref{display:none}.weui-hidden-space:empty:before{content:"\00A0";position:absolute;width:1px;height:1px;overflow:hidden}.weui-a11y-combo{position:relative}.weui-a11y-combo__helper{opacity:0;position:absolute;width:100%;height:100%;overflow:hidden}.weui-a11y-combo__content{position:relative;z-index:1}.weui-wa-hotarea-el{position:absolute;top:50%;left:50%;-webkit-transform:translate(-50%,-50%);transform:translate(-50%,-50%);min-width:44px;min-height:44px;width:100%;height:100%}.weui-wa-hotarea-el__wrp,.weui-wa-hotarea,.weui-wa-hotarea_before{position:relative}.weui-wa-hotarea-el__wrp a,.weui-wa-hotarea a,.weui-wa-hotarea_before a,.weui-wa-hotarea-el__wrp button,.weui-wa-hotarea button,.weui-wa-hotarea_before button,.weui-wa-hotarea-el__wrp navigator,.weui-wa-hotarea navigator,.weui-wa-hotarea_before navigator{position:relative;z-index:1}.weui-wa-hotarea_before:before,.weui-wa-hotarea:after{content:"";pointer-events:auto;position:absolute;top:50%;left:50%;-webkit-transform:translate(-50%,-50%);transform:translate(-50%,-50%);min-width:44px;min-height:44px;width:100%;height:100%}.test{color:red}body .weui-hidden_abs,body .aria_hidden_abs{position:absolute;opacity:0;overflow:hidden;left:auto;width:1px;height:1px}body .weui-a11y_ref{display:none}body .weui-wa-hotarea_before:before,body .weui-wa-hotarea:after{pointer-events:auto}body .weui-wa-hotarea-el__wrp button{position:relative;z-index:1}</style>
+<script nonce="" type="text/javascript" src="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/lib/wa-helper.min7729a9.js"></script>
+<script nonce="" type="text/javascript">
+  
+</script>
+
+
+
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_colon">：</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_comma">，</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_period">。</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_space">&nbsp;</span>
+
+
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_type_video">视频</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_type_weapp">小程序</span>
+
+
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_zan_btn_txt">赞</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_zan_btn_tips">，轻点两下取消赞</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_like_btn_txt">在看</span>
+<span aria-hidden="true" class="weui-a11y_ref" style="display:none" id="js_a11y_like_btn_tips">，轻点两下取消在看</span>
+
+        <script nonce="">
+	var __DEBUGINFO = {
+		debug_js: "//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/debug/console7729a9.js",
+		safe_js: "//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/safe/moonsafe7729a9.js",
+		res_list: []
+	};
+</script>
+
+<script nonce="" type="text/javascript">
+	(function () {
+		var totalCount = 0,
+			finishCount = 0;
+
+		function _loadVConsolePlugin() {
+			window.vConsole = new window.VConsole();
+			while (window.vConsolePlugins.length > 0) {
+				var p = window.vConsolePlugins.shift();
+				window.vConsole.addPlugin(p);
+			}
+			
+			if (!window.vConsole.isInited) {
+				window.vConsole._render();
+				window.vConsole._mockTap();
+				window.vConsole._bindEvent();
+				window.vConsole._autoRun();
+			}
+		}
+
+		function _addScript(uri, cb) {
+			totalCount++;
+			var node = document.createElement('SCRIPT');
+			node.type = 'text/javascript';
+			node.src = uri;
+			node.setAttribute('nonce', '');
+			if (cb) {
+				node.onload = cb;
+			}
+			document.getElementsByTagName('head')[0].appendChild(node);
+		}
+		if (
+			(document.cookie && document.cookie.indexOf('vconsole_open=1') > -1)
+			|| location.href.indexOf('vconsole=1') > -1
+		) {
+			window.vConsolePlugins = [];
+			_addScript('//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/vconsole/3.2.2/vconsole.min7729a7.js', function () {
+				
+				_addScript('//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/vconsole/plugin/vconsole-mpopt/1.0.1/vconsole-mpopt7729a7.js', _loadVConsolePlugin);
+			});
+		}
+		if (document.cookie && document.cookie.indexOf('__xweb_remote_debug_device_token__') > -1) {
+			_addScript('https://mp.weixin.qq.com/mmbizappmsg/zh_CN/htmledition/js/scripts/mprdev-0.2.5.js', function () {
+				_addScript('https://mp.weixin.qq.com/mmbizappmsg/zh_CN/htmledition/js/scripts/xwebrd-0.0.2.js');
+			});
+		}
+
+		
+		try {
+			var adIframeUrl = localStorage.getItem('__WXLS_ad_iframe_url');
+			if (window === top) {
+				if (adIframeUrl) {
+					if (navigator.userAgent.indexOf('iPhone') > -1) {
+						var img = new Image();
+						img.src = adIframeUrl;
+					} else {
+						var link = document.createElement('link');
+						link.rel = 'prefetch';
+						link.href = adIframeUrl;
+						document.getElementsByTagName('head')[0].appendChild(link);
+					}
+				}
+			}
+		} catch (err) {
+
+		}
+
+	})();
+</script>
+        <script>window.__moon_host = 'res.wx.qq.com';window.moon_map = {"biz_wap/utils/ajax_wx.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/ajax_wx7729a9.js","biz_common/utils/respTypes.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/respTypes7729a9.js","biz_common/utils/url/parse.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/url/parse7729a9.js","biz_common/utils/string/html.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/utils/string/html7729a9.js","common/color/dark.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/dark7729a9.js","common/color/light.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/light7729a9.js","biz_wap/utils/mmversion.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/mmversion7729a9.js","biz_wap/jsapi/mpapp_core.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/jsapi/mpapp_core7729a9.js","complain/tips.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/complain/tips7729a9.js","biz_common/dom/event.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_common/dom/event7729a9.js","biz_wap/utils/ajax.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/utils/ajax7729a9.js","common/color/background_color.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/common/color/background_color7729a9.js","biz_wap/jsapi/core.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/jsapi/core7729a9.js","secitptpage/template/verify.js":"//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/secitptpage/template/verify7729a9.js"};</script><script type="text/javascript">window.__wxgspeeds={}; window.__wxgspeeds.moonloadtime=+new Date()</script><script  type="text/javascript" src="//res.wx.qq.com/mmbizwap/zh_CN/htmledition/js/biz_wap/moon7729a9.js"></script>
+<script type="text/javascript" src="https://captcha.gtimg.com/TCaptcha.js"></script>
+<script type="text/javascript">
+window.cgiData = {
+    register_code : "0" * 1,
+    target_url : "https://mp.weixin.qq.com/s?__biz=MzA3MjQ1Mjg2MQ==\x26amp;mid=2651526157\x26amp;idx=1\x26amp;sn=bfc50bb7d76462dfe1dd412fceb73b97\x26amp;chksm=84e1ab9fb39622894678cdbb13c5262262a0469cb1cf4326ac1f13b66af9a2e0d562f182174a\x26amp;nwr_flag=1",
+    cap_appid : "2003810213",
+    cap_sid : "4508590775431679609",
+    poc_sid : "HM2zdGijbOCHWQ9Y4bEnUfRRdmr12wCGRZr2tawo",
+    timeout_ms : "30000" * 1,
+    poc_token : "HM2zdGijv176ir9lsab2gSXz7Gxxv32lrfdB-Bxd",
+}
+console.log('window.cgiData', window.cgiData);
+seajs.use('secitptpage/template/verify.js')
+</script>
+
+    </body>
+</html>
+
diff --git a/dsLightRag/Test/TestCrawl.py b/dsLightRag/Test/TestCrawl.py
index 7a9a4e6d..63d2eb14 100644
--- a/dsLightRag/Test/TestCrawl.py
+++ b/dsLightRag/Test/TestCrawl.py
@@ -7,20 +7,10 @@ import datetime
 import logging
 import random
 import re
+import os
 
 import requests
 
-# 1、安装Firefox软件【最新】
-# https://www.firefox.com.cn/download/#product-desktop-release
-
-# 2、下载geckodriver驱动【最新】
-# https://splinter-docs-zh-cn.readthedocs.io/zh/latest/drivers/firefox.html
-# https://github.com/mozilla/geckodriver/releases
-
-# 3、Python爬虫实战系列：微信公众号文章爬取的5种技术方案总结及代码示例！
-# 方案5：微信公众号后台引用链接方式爬取
-# https://blog.csdn.net/Python_trys/article/details/146506009
-
 """
 # 查看selenium版本
 pip show selenium
@@ -43,17 +33,25 @@ import json
 if __name__ == '__main__':
     # 定义一个空的字典，存放cookies内容
     cookies = {}
-    # 设置headers
+    # 设置headers - 使用微信内置浏览器的User-Agent
     header = {
         "HOST": "mp.weixin.qq.com",
-        "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0"
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
+        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+        "Accept-Encoding": "gzip, deflate, br",
+        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
+        "Connection": "keep-alive"
     }
     # 用webdriver启动谷歌浏览器
     logging.info("启动浏览器，打开微信公众号登录界面")
     options = Options()
-    options.add_argument('-headless')  # 无头参数
+    # options.add_argument('-headless')  # 无头参数，调试时可以注释掉
+    
+    # 设置微信内置浏览器的User-Agent
+    options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)')
+    
     service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service)
+    driver = webdriver.Chrome(service=service, options=options)
     # 打开微信公众号登录页面
     driver.get('https://mp.weixin.qq.com/')
     # 等待5秒钟
@@ -143,10 +141,130 @@ if __name__ == '__main__':
                 'publish_time': datetime.datetime.fromtimestamp(int(item.get("update_time"))).strftime('%Y-%m-%d %H:%M:%S'),
                 'collection_time': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
             }
+            print("new_article:", new_article)
             logging.info("new_article:", new_article)
             article_urls.append(item.get('link'))
             time.sleep(1)
 
+    # 确保Logs目录存在
+    logs_dir = "./Test/Logs"
+    if not os.path.exists(logs_dir):
+        os.makedirs(logs_dir)
+
     for article_url in article_urls:
         print("正在爬取文章：" + article_url)
+        try:
+            # 使用requests直接获取文章内容，模拟微信环境
+            wechat_headers = {
+                "User-Agent": "Mozilla/5.0 (Linux; Android 10; MI 8 Build/QKQ1.190828.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/83.0.4103.101 Mobile Safari/537.36 XWEB/1768 MMWEBSDK/20210302 MMWEBID/6253 MicroMessenger/8.0.2.1860(0x28000234) Process/toolsmp WeChat/arm64 Weixin NetType/WIFI Language/zh_CN ABI/arm64",
+                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
+                "Accept-Encoding": "gzip, deflate",
+                "Accept-Language": "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
+                "X-Requested-With": "com.tencent.mm",
+                "Referer": "https://mp.weixin.qq.com/"
+            }
+            
+            # 使用selenium打开文章链接，设置请求头
+            driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': wechat_headers})
+            driver.get(article_url)
+            # 增加等待时间，确保页面完全加载
+            time.sleep(5)
+            
+            # 检查是否需要登录
+            if "请在微信客户端中打开链接" in driver.page_source or "请在微信中打开此链接" in driver.page_source:
+                print(f"文章需要在微信中打开，尝试使用requests直接获取：{article_url}")
+                # 尝试使用requests直接获取
+                response = requests.get(article_url, headers=wechat_headers, cookies=cookies)
+                if "请在微信客户端中打开链接" in response.text or "请在微信中打开此链接" in response.text:
+                    print(f"使用requests仍然无法获取，跳过此文章：{article_url}")
+                    continue
+                else:
+                    # 保存获取到的HTML内容
+                    filename = f"article_{article_url.split('sn=')[1][:10] if 'sn=' in article_url else 'unknown'}"
+                    save_path = f"{logs_dir}/{filename}.html"
+                    with open(save_path, "w", encoding="utf-8") as f:
+                        f.write(response.text)
+                    print(f"已保存文章HTML内容：{save_path}")
+                    continue
+                
+            # 使用更可靠的选择器查找标题和内容
+            try:
+                # 尝试多种可能的标题选择器
+                title_selectors = [
+                    '//h1[@class="rich_media_title"]', 
+                    '//h1[@id="activity-name"]',
+                    '//h2[@class="rich_media_title"]',
+                    '//div[@class="rich_media_content"]//h1',
+                    '//div[@id="js_article"]//h1'
+                ]
+                
+                title = None
+                for selector in title_selectors:
+                    try:
+                        title_element = driver.find_element('xpath', selector)
+                        title = title_element.text.strip()
+                        if title:
+                            break
+                    except:
+                        continue
+                
+                if not title:
+                    # 如果所有选择器都失败，尝试从页面标题获取
+                    title = driver.title.replace(" - 微信公众号", "").strip()
+                
+                # 尝试多种可能的内容选择器
+                content_selectors = [
+                    '//div[@class="rich_media_content"]',
+                    '//div[@id="js_content"]',
+                    '//div[@class="rich_media_wrp"]'
+                ]
+                
+                content = None
+                for selector in content_selectors:
+                    try:
+                        content_element = driver.find_element('xpath', selector)
+                        content = content_element.text.strip()
+                        if content:
+                            break
+                    except:
+                        continue
+                
+                if not content:
+                    # 如果无法获取内容，至少保存页面源码
+                    content = "无法提取正文内容，保存页面源码：\n" + driver.page_source
+                
+                # 创建文件名（使用标题，但去除不合法的文件名字符）
+                if not title:
+                    title = "未知标题_" + article_url.split("sn=")[1][:10] if "sn=" in article_url else "未知标题"
+                    
+                filename = re.sub(r'[\\/:*?"<>|]', '_', title)
+                
+                # 保存文章内容到文件
+                save_path = f"{logs_dir}/{filename}.txt"
+                with open(save_path, "w", encoding="utf-8") as f:
+                    f.write(f"标题：{title}\n\n")
+                    f.write(f"链接：{article_url}\n\n")
+                    f.write(f"内容：\n{content}")
+                    
+                print(f"文章《{title}》保存成功：{save_path}")
+                
+            except Exception as e:
+                print(f"提取文章内容失败：{str(e)}")
+                # 保存页面源码以便分析
+                error_filename = "error_" + article_url.split("sn=")[1][:10] if "sn=" in article_url else "error_page"
+                error_path = f"{logs_dir}/{error_filename}.html"
+                with open(error_path, "w", encoding="utf-8") as f:
+                    f.write(driver.page_source)
+                print(f"已保存页面源码到：{error_path}")
+            
+            # 避免频繁请求被封
+            time.sleep(random.uniform(3, 7))
+            
+        except Exception as e:
+            print(f"爬取文章失败：{article_url}，错误信息：{str(e)}")
+            continue
+    
+    # 关闭浏览器
+    driver.quit()
+    print("所有文章爬取完成！")