'commit'

4 days ago · aa805dddbb
parent 208c8f8c28
commit aa805dddbb
7 changed files with 13 additions and 687 deletions
--- a/dsLightRag/Config/pycache/Config.cpython-310.pyc
+++ b/dsLightRag/Config/pycache/Config.cpython-310.pyc
--- a/dsLightRag/Start.py
+++ b/dsLightRag/Start.py
@ -1,341 +0,0 @@
-import json
-import subprocess
-import tempfile
-import urllib
-import uuid
-from io import BytesIO
-
-import fastapi
-import uvicorn
-from fastapi import FastAPI, HTTPException
-from lightrag import QueryParam
-from sse_starlette import EventSourceResponse
-from starlette.responses import StreamingResponse
-from starlette.staticfiles import StaticFiles
-
-from Util.LightRagUtil import *
-from Util.PostgreSQLUtil import init_postgres_pool
-
-# 更详细地控制日志输出
-logger = logging.getLogger('lightrag')
-logger.setLevel(logging.INFO)
-handler = logging.StreamHandler()
-handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-logger.addHandler(handler)
-
-
-async def lifespan(app: FastAPI):
-    yield
-
-app = FastAPI(lifespan=lifespan)
-
-# 挂载静态文件目录
-app.mount("/static", StaticFiles(directory="Static"), name="static")
-
-
-@app.post("/api/rag")
-async def rag(request: fastapi.Request):
-    data = await request.json()
-    topic = data.get("topic")  # Chinese, Math
-    mode = data.get("mode", "hybrid")  # 默认为hybrid模式
-    # 拼接路径
-    WORKING_PATH = "./Topic/" + topic
-    # 查询的问题
-    query = data.get("query")
-    # 关闭参考资料
-    user_prompt = "\n 1、不要输出参考资料 或者 References ！"
-    user_prompt = user_prompt + "\n 2、资料中提供化学反应方程式的，严格按提供的Latex公式输出，绝不允许对Latex公式进行修改！"
-    user_prompt = user_prompt + "\n 3、如果资料中提供了图片的，需要仔细检查图片下方描述文字是否与主题相关,不相关的不要提供！相关的一定要严格按照原文提供图片输出，不允许省略或不输出！"
-    user_prompt = user_prompt + "\n 4、如果问题与提供的知识库内容不符，则明确告诉未在知识库范围内提到！"
-    user_prompt = user_prompt + "\n 5、发现输出内容中包含Latex公式的，一定要检查是不是包含了$$或$的包含符号,不能让Latex无包含符号出现！"
-
-    async def generate_response_stream(query: str):
-        try:
-            rag = await initialize_rag(WORKING_PATH)
-            await rag.initialize_storages()
-            await initialize_pipeline_status()
-            resp = await rag.aquery(
-                query=query,
-                param=QueryParam(mode=mode, stream=True, user_prompt=user_prompt, enable_rerank=True))
-
-            async for chunk in resp:
-                if not chunk:
-                    continue
-                yield f"data: {json.dumps({'reply': chunk})}\n\n"
-                print(chunk, end='', flush=True)
-        except Exception as e:
-            yield f"data: {json.dumps({'error': str(e)})}\n\n"
-        finally:
-            # 清理资源
-            await rag.finalize_storages()
-
-    return EventSourceResponse(generate_response_stream(query=query))
-
-
-@app.post("/api/save-word")
-async def save_to_word(request: fastapi.Request):
-    output_file = None
-    try:
-        # Parse request data
-        try:
-            data = await request.json()
-            markdown_content = data.get('markdown_content', '')
-            if not markdown_content:
-                raise ValueError("Empty MarkDown content")
-        except Exception as e:
-            logger.error(f"Request parsing failed: {str(e)}")
-            raise HTTPException(status_code=400, detail=f"Invalid request: {str(e)}")
-
-        # 创建临时Markdown文件
-        temp_md = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex + ".md")
-        with open(temp_md, "w", encoding="utf-8") as f:
-            f.write(markdown_content)
-
-        # 使用pandoc转换
-        output_file = os.path.join(tempfile.gettempdir(), "【理想大模型】问答.docx")
-        subprocess.run(['pandoc', temp_md, '-o', output_file, '--resource-path=static'], check=True)
-
-        # 读取生成的Word文件
-        with open(output_file, "rb") as f:
-            stream = BytesIO(f.read())
-
-        # 返回响应
-        encoded_filename = urllib.parse.quote("【理想大模型】问答.docx")
-        return StreamingResponse(
-            stream,
-            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"})
-
-    except HTTPException:
-        raise
-    except Exception as e:
-        logger.error(f"Unexpected error: {str(e)}")
-        raise HTTPException(status_code=500, detail="Internal server error")
-    finally:
-        # 清理临时文件
-        try:
-            if temp_md and os.path.exists(temp_md):
-                os.remove(temp_md)
-            if output_file and os.path.exists(output_file):
-                os.remove(output_file)
-        except Exception as e:
-            logger.warning(f"Failed to clean up temp files: {str(e)}")
-
-
-@app.get("/api/tree-data")
-async def get_tree_data():
-    try:
-        pg_pool = await init_postgres_pool()
-        async with pg_pool.acquire() as conn:
-            # 执行查询
-            rows = await conn.fetch("""
-                                    SELECT id,
-                                           title,
-                                           parent_id,
-                                           is_leaf,
-                                           prerequisite,
-                                           related
-                                    FROM knowledge_points
-                                    ORDER BY parent_id, id
-                                    """)
-        # 构建节点映射
-        nodes = {}
-        for row in rows:
-            prerequisite_data = json.loads(row[4]) if row[4] else []
-            # 转换先修知识格式
-            if isinstance(prerequisite_data, list) and len(prerequisite_data) > 0 and isinstance(prerequisite_data[0],
-                                                                                                 dict):
-                # 已经是新格式
-                prerequisites = prerequisite_data
-            else:
-                # 转换为新格式
-                prerequisites = [{"id": str(id), "title": title} for id, title in
-                                 (prerequisite_data or [])] if prerequisite_data else None
-
-            nodes[row[0]] = {
-                "id": row[0],
-                "title": row[1],
-                "parent_id": row[2] if row[2] is not None else 0,
-                "isParent": not row[3],
-                "prerequisite": prerequisites if prerequisites and len(prerequisites) > 0 else None,
-                "related": json.loads(row[5]) if row[5] and len(json.loads(row[5])) > 0 else None,
-                "open": True
-            }
-
-        # 构建树形结构
-        tree_data = []
-        for node_id, node in nodes.items():
-            parent_id = node["parent_id"]
-            if parent_id == 0:
-                tree_data.append(node)
-            else:
-                if parent_id in nodes:
-                    if "children" not in nodes[parent_id]:
-                        nodes[parent_id]["children"] = []
-                    nodes[parent_id]["children"].append(node)
-
-        return {"code": 0, "data": tree_data}
-    except Exception as e:
-        return {"code": 1, "msg": str(e)}
-
-
-@app.post("/api/update-knowledge")
-async def update_knowledge(request: fastapi.Request):
-    try:
-        data = await request.json()
-        node_id = data.get('node_id')
-        knowledge = data.get('knowledge', [])
-        update_type = data.get('update_type', 'prerequisite')  # 默认为先修知识
-
-        if not node_id:
-            raise ValueError("Missing node_id")
-
-        pg_pool = await init_postgres_pool()
-        async with pg_pool.acquire() as conn:
-            if update_type == 'prerequisite':
-                await conn.execute("""
-                                   UPDATE knowledge_points
-                                   SET prerequisite = $1
-                                   WHERE id = $2
-                                   """,
-                                   json.dumps(
-                                       [{"id": p["id"], "title": p["title"]} for p in knowledge],
-                                       ensure_ascii=False
-                                   ),
-                                   node_id)
-            else:  # related knowledge
-                await conn.execute("""
-                                   UPDATE knowledge_points
-                                   SET related = $1
-                                   WHERE id = $2
-                                   """,
-                                   json.dumps(
-                                       [{"id": p["id"], "title": p["title"]} for p in knowledge],
-                                       ensure_ascii=False
-                                   ),
-                                   node_id)
-
-        return {"code": 0, "msg": "更新成功"}
-    except Exception as e:
-        logger.error(f"更新知识失败: {str(e)}")
-        return {"code": 1, "msg": str(e)}
-
-
-@app.post("/api/render_html")
-async def render_html(request: fastapi.Request):
-    data = await request.json()
-    html_content = data.get('html_content')
-    html_content = html_content.replace("```html", "")
-    html_content = html_content.replace("```", "")
-    # 创建临时文件
-    filename = f"relation_{uuid.uuid4().hex}.html"
-    filepath = os.path.join('../static/temp', filename)
-
-    # 确保temp目录存在
-    os.makedirs('../static/temp', exist_ok=True)
-
-    # 写入文件
-    with open(filepath, 'w', encoding='utf-8') as f:
-        f.write(html_content)
-
-    return {
-        'success': True,
-        'url': f'/static/temp/{filename}'
-    }
-
-
-@app.get("/api/sources")
-async def get_sources(page: int = 1, limit: int = 10):
-    try:
-        pg_pool = await init_postgres_pool()
-        async with pg_pool.acquire() as conn:
-            # 获取总数
-            total = await conn.fetchval("SELECT COUNT(*) FROM t_wechat_source")
-            # 获取分页数据
-            offset = (page - 1) * limit
-            rows = await conn.fetch(
-                """
-                SELECT id, account_id, account_name, created_at
-                FROM t_wechat_source
-                ORDER BY created_at DESC
-                    LIMIT $1
-                OFFSET $2
-                """,
-                limit, offset
-            )
-
-            sources = [
-                {
-                    "id": row[0],
-                    "name": row[1],
-                    "type": row[2],
-                    "update_time": row[3].strftime("%Y-%m-%d %H:%M:%S") if row[3] else None
-                }
-                for row in rows
-            ]
-
-            return {
-                "code": 0,
-                "data": {
-                    "list": sources,
-                    "total": total,
-                    "page": page,
-                    "limit": limit
-                }
-            }
-    except Exception as e:
-        return {"code": 1, "msg": str(e)}
-
-
-@app.get("/api/articles")
-async def get_articles(page: int = 1, limit: int = 10):
-    try:
-        pg_pool = await init_postgres_pool()
-        async with pg_pool.acquire() as conn:
-            # 获取总数
-            total = await conn.fetchval("SELECT COUNT(*) FROM t_wechat_articles")
-            # 获取分页数据
-            offset = (page - 1) * limit
-            rows = await conn.fetch(
-                """
-                SELECT a.id,
-                       a.title,
-                       a.source as name,
-                       a.publish_time,
-                       a.collection_time,
-                       a.url
-                FROM t_wechat_articles a
-                ORDER BY a.collection_time DESC
-                    LIMIT $1
-                OFFSET $2
-                """,
-                limit, offset
-            )
-
-            articles = [
-                {
-                    "id": row[0],
-                    "title": row[1],
-                    "source": row[2],
-                    "publish_date": row[3].strftime("%Y-%m-%d") if row[3] else None,
-                    "collect_time": row[4].strftime("%Y-%m-%d %H:%M:%S") if row[4] else None,
-                    "url": row[5],
-                }
-                for row in rows
-            ]
-
-            return {
-                "code": 0,
-                "data": {
-                    "list": articles,
-                    "total": total,
-                    "page": page,
-                    "limit": limit
-                }
-            }
-    except Exception as e:
-        return {"code": 1, "msg": str(e)}
-
-
-if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/dsLightRag/Topic/JiHe/kv_store_llm_response_cache.json
+++ b/dsLightRag/Topic/JiHe/kv_store_llm_response_cache.json
@ -537,5 +537,18 @@
    "create_time": 1752818371,
    "update_time": 1752818371,
    "_id": "hybrid:keywords:37a1721ae1b1e0a8858a85a94a139d9a"
+  },
+  "hybrid:keywords:6d5e4e5a7a264a947d2e6e9cb8befec7": {
+    "return": "{\"high_level_keywords\": [\"\\u51e0\\u4f55\\u8bc1\\u660e\", \"\\u4e09\\u89d2\\u5f62\", \"\\u5185\\u90e8\\u70b9\", \"\\u89d2\\u5ea6\\u5173\\u7cfb\"], \"low_level_keywords\": [\"\\u4e09\\u89d2\\u5f62ABC\", \"\\u70b9P\", \"\\u2220BPC\", \"\\u2220A\"]}",
+    "cache_type": "keywords",
+    "chunk_id": null,
+    "embedding": null,
+    "embedding_shape": null,
+    "embedding_min": null,
+    "embedding_max": null,
+    "original_prompt": "求证：在三角形ABC中，P为其内部任意一点。请证明：∠BPC > ∠A。",
+    "create_time": 1752821457,
+    "update_time": 1752821457,
+    "_id": "hybrid:keywords:6d5e4e5a7a264a947d2e6e9cb8befec7"
  }
 }
--- a/dsLightRag/WxGzh/T1_LoginGetCookie.py
+++ b/dsLightRag/WxGzh/T1_LoginGetCookie.py
@ -1,78 +0,0 @@
-# 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
-# https://blog.csdn.net/k352733625/article/details/149222945
-
-# 微信爬爬猫---公众号文章抓取代码分析
-# https://blog.csdn.net/yajuanpi4899/article/details/121584268
-
-import json
-import logging
-
-from torch.distributed.elastic.timer import expires
-
-"""
-# 查看selenium版本
-pip show selenium
-4.34.2
-
-# 查看Chrome浏览器版本
-chrome://version/
-138.0.7204.101 (正式版本) （64 位）
-
-# 下载驱动包
-https://googlechromelabs.github.io/chrome-for-testing/
-https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
-"""
-import time
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.chrome.service import Service as ChromeService
-
-if __name__ == '__main__':
-    # 定义一个空的字典，存放cookies内容
-    cookies = {}
-    # 设置headers - 使用微信内置浏览器的User-Agent
-    header = {
-        "HOST": "mp.weixin.qq.com",
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36 QBCore/4.0.1301.400 QQBrowser/9.0.2524.400 Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2875.116 Safari/537.36 NetType/WIFI MicroMessenger/7.0.20.1781(0x6700143B) WindowsWechat(0x63010200)",
-        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-        "Accept-Encoding": "gzip, deflate, br",
-        "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.6,en;q=0.5;q=0.4",
-        "Connection": "keep-alive"
-    }
-    # 用webdriver启动谷歌浏览器
-    logging.info("启动浏览器，打开微信公众号登录界面")
-    options = Options()
-
-    service = ChromeService(executable_path=r"C:\Windows\System32\chromedriver.exe")
-    driver = webdriver.Chrome(service=service, options=options)
-    # 打开微信公众号登录页面
-    driver.get('https://mp.weixin.qq.com/')
-    # 等待5秒钟
-    time.sleep(2)
-    # # 拿手机扫二维码！
-    logging.info("请拿手机扫码二维码登录公众号")
-    time.sleep(20)
-
-    # 重新载入公众号登录页，登录之后会显示公众号后台首页，从这个返回内容中获取cookies信息
-    driver.get('https://mp.weixin.qq.com/')
-    # 获取cookies
-    cookie_items = driver.get_cookies()
-    expiry=-1
-    # 获取到的cookies是列表形式，将cookies转成json形式并存入本地名为cookie的文本中
-    for cookie_item in cookie_items:
-        cookies[cookie_item['name']] = cookie_item['value']
-        if('expiry' in cookie_item and cookie_item['expiry'] > expiry):
-            expiry = cookie_item['expiry']
-
-    if "slave_sid" not in cookies:
-        logging.info("登录公众号失败，获取cookie失败")
-        exit()
-
-    # 将cookies写入文件
-    cookies["expiry"] = expiry
-    with open('cookies.txt', mode='w', encoding="utf-8") as f:
-        f.write(json.dumps(cookies, indent=4, ensure_ascii=False))
-    # 关闭浏览器
-    driver.quit()
-    # 输出提示
-    print("成功获取了cookies内容！")
--- a/dsLightRag/WxGzh/T2_CollectArticle.py
+++ b/dsLightRag/WxGzh/T2_CollectArticle.py
@ -1,251 +0,0 @@
-# 详解（一）Python + Selenium 批量采集微信公众号，搭建自己的微信公众号每日AI简报，告别信息焦虑
-# https://blog.csdn.net/k352733625/article/details/149222945
-
-# 微信爬爬猫---公众号文章抓取代码分析
-# https://blog.csdn.net/yajuanpi4899/article/details/121584268
-
-"""
-# 查看selenium版本
-pip show selenium
-4.34.2
-
-# 查看Chrome浏览器版本
-chrome://version/
-138.0.7204.101 (正式版本) （64 位）
-
-# 下载驱动包
-https://googlechromelabs.github.io/chrome-for-testing/
-https://storage.googleapis.com/chrome-for-testing-public/138.0.7204.94/win64/chromedriver-win64.zip
-"""
-import asyncio
-import datetime
-import json
-import logging
-import random
-import re
-import time
-
-import requests
-from selenium.webdriver.chrome.options import Options
-
-from Util.LightRagUtil import initialize_pg_rag, initialize_rag
-from Util.PostgreSQLUtil import init_postgres_pool
-from Util.WxGzhUtil import init_wechat_browser, get_article_content
-
-# 删除重复的日志配置，只保留以下内容
-logger = logging.getLogger('WxGzh')
-logger.setLevel(logging.INFO)
-
-# 确保只添加一个handler
-if not logger.handlers:
-    handler = logging.StreamHandler()
-    handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
-    logger.addHandler(handler)
-
-# 添加微信请求头
-header = {
-    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
-    'Referer': 'https://mp.weixin.qq.com/'
-}
-
-async def get_wechat_sources():
-    """从t_wechat_source表获取微信公众号列表"""
-    try:
-        pool = await init_postgres_pool()
-        async with pool.acquire() as conn:
-            rows = await conn.fetch('SELECT * FROM t_wechat_source')
-            return [dict(row) for row in rows]
-    finally:
-        await pool.close()
-
-
-async def is_article_exist(pool, article_url):
-    """检查文章URL是否已存在数据库中"""
-    try:
-        async with pool.acquire() as conn:
-            row = await conn.fetchrow('''
-                                      SELECT 1
-                                      FROM t_wechat_articles
-                                      WHERE url = $1 LIMIT 1
-                                      ''', article_url)
-            return row is not None
-    except Exception as e:
-        logging.error(f"检查文章存在性失败: {e}")
-        return False  # 出错时默认返回False，避免影响正常流程
-
-
-async def save_article_to_db(pool, article_title, account_name, article_url, publish_time, content, source_id):
-    # 在这里调用 lightrag进行知识库构建
-    WORKING_DIR = f"../Topic/ChangChun"
-    docx_name = f"{account_name}_{article_title}"  # 组合来源和标题作为文档名
-    logger.info(f"开始处理文档: {docx_name}")
-    try:
-        # 注意：默认设置使用NetworkX
-        rag = await initialize_rag(WORKING_DIR)
-        await rag.ainsert(content)
-        logger.info(f"索引完成: {docx_name}")
-    except Exception as e:
-        print(f"An error occurred: {e}")
-    finally:
-        await rag.finalize_storages()
-
-    try:
-        async with pool.acquire() as conn:
-            await conn.execute('''
-                               INSERT INTO t_wechat_articles
-                                   (title, source, url, publish_time, content, source_id)
-                               VALUES ($1, $2, $3, $4, $5, $6)
-                               ''', article_title, account_name, article_url,
-                               publish_time, content, source_id)
-            logger.info(f"保存文档到知识库成功: {docx_name}")
-    except Exception as e:
-        logging.error(f"保存文章失败: {e}")
-
-
-async def initialize_wechat_session():
-    """初始化微信会话，获取cookies和token"""
-    with open('cookies.txt', 'r', encoding='utf-8') as f:
-        content = f.read()
-    cookies = json.loads(content)
-    global driver  # 添加这行
-    expiry = cookies["expiry"]
-    if expiry:
-        current_timestamp = time.time()
-        if current_timestamp > expiry:
-            logger.error("Cookie已过期")
-            exit()
-
-    del cookies["expiry"]
-
-    options = Options()
-    options.add_argument('-headless')
-    driver = init_wechat_browser()
-
-    url = 'https://mp.weixin.qq.com'
-    response = requests.get(url=url, allow_redirects=False, cookies=cookies)
-
-    if 'Location' in response.headers:
-        redirect_url = response.headers.get("Location")
-        token_match = re.findall(r'token=(\d+)', redirect_url)
-        if token_match:
-            token = token_match[0]
-            return cookies, token
-
-    return None, None
-
-
-async def get_wechat_account_list(cookies, token, account_name):
-    """获取指定公众号的fakeid"""
-    search_url = 'https://mp.weixin.qq.com/cgi-bin/searchbiz?'
-    query_id = {
-        'action': 'search_biz',
-        'token': token,
-        'lang': 'zh_CN',
-        'f': 'json',
-        'ajax': '1',
-        'random': random.random(),
-        'query': account_name,
-        'begin': '0',
-        'count': '5'
-    }
-
-    search_response = requests.get(search_url, cookies=cookies, headers=header, params=query_id)
-    lists = search_response.json().get('list')[0]
-    return lists.get('fakeid')
-
-
-async def get_article_list(cookies, token, fakeid):
-    """获取公众号文章列表"""
-    appmsg_url = 'https://mp.weixin.qq.com/cgi-bin/appmsg?'
-    query_id_data = {
-        'token': token,
-        'lang': 'zh_CN',
-        'f': 'json',
-        'ajax': '1',
-        'random': random.random(),
-        'action': 'list_ex',
-        'begin': '0',
-        'count': '5',
-        'query': '',
-        'fakeid': fakeid,
-        'type': '9'
-    }
-
-    query_fakeid_response = requests.get(appmsg_url, cookies=cookies, headers=header, params=query_id_data)
-    return query_fakeid_response.json().get('app_msg_list')
-
-
-async def process_single_article(article_info, account_info, cookies, token):
-    """处理单篇文章"""
-    article_url = article_info.get('link')
-    article_title = article_info.get('title')
-    publish_time = datetime.datetime.fromtimestamp(int(article_info.get("update_time")))
-
-    if '试卷' in article_title:
-        return False
-
-    try:
-        pool = await init_postgres_pool()
-        # 先检查文章是否已存在
-        if await is_article_exist(pool, article_url):
-            logger.info(f'文章已存在，跳过保存: {account_info["account_name"]}-{article_title}')
-            return False
-        content = get_article_content(article_url)
-        await save_article_to_db(pool, article_title, account_info["account_name"],
-                                 article_url, publish_time, content, account_info["id"])
-        return True
-    except Exception as e:
-        logger.error(f"处理文章时出错: {e}")
-        return False
-    finally:
-        if 'pool' in locals():
-            await pool.close()
-
-
-async def process_wechat_account(account_info, cookies, token):
-    """处理单个公众号的所有文章"""
-    cnt = 0
-    fakeid = await get_wechat_account_list(cookies, token, account_info["account_name"])
-    articles = await get_article_list(cookies, token, fakeid)
-
-    for article in articles:
-        success = await process_single_article(article, account_info, cookies, token)
-        if success:
-            cnt += 1
-        time.sleep(1)
-
-    logger.info(f"成功获取公众号: {account_info['account_name']} {cnt}篇文章。")
-    return cnt
-
-
-async def main():
-    """主函数"""
-    while True:
-        try:
-            logger.info("开始执行微信公众号文章采集任务")
-            cookies, token = await initialize_wechat_session()
-            if not cookies or not token:
-                logger.error("初始化微信会话失败")
-                continue
-
-            account_list = await get_wechat_sources()
-            for account in account_list:
-                await process_wechat_account(account, cookies, token)
-
-            logger.info("本次采集任务完成，等待30分钟后再次执行")
-            await asyncio.sleep(30 * 60)  # 30分钟
-        except Exception as e:
-            logger.error(f"主循环发生错误: {e}")
-            await asyncio.sleep(30 * 60)  # 出错后也等待30分钟
-        finally:
-            if 'driver' in globals():
-                driver.quit()
-
-
-if __name__ == '__main__':
-    loop = asyncio.new_event_loop()
-    asyncio.set_event_loop(loop)
-    try:
-        loop.run_until_complete(main())
-    finally:
-        loop.close()
--- a/dsLightRag/WxGzh/init.py
+++ b/dsLightRag/WxGzh/init.py
--- a/dsLightRag/WxGzh/cookies.txt
+++ b/dsLightRag/WxGzh/cookies.txt
@ -1,17 +0,0 @@
-{
-    "_clsk": "2gtve8|1752546228205|1|1|mp.weixin.qq.com/weheat-agent/payload/record",
-    "xid": "16332bed01be1055e236ad45b33af8df",
-    "data_bizuin": "3514353238",
-    "slave_user": "gh_4f88a4e194da",
-    "slave_sid": "QzBRX1FWTXNMaEdJYnc4ODBaM3FJU3RRbjVJNFE2N2IzMXFyVGlRQ0V5YklvNGFOc3NBWHdjV2J5OVg5U0JBVXdfdGhSU3lObXRheG1TdFUyXzVFcTFYS3E1NTh2aTlnSlBOOUluMUljUnBkYktjeUJDM216WVJNYzJKQkx2eW9Ib1duUk1yWXI3RndTa2dK",
-    "rand_info": "CAESIFwUSYus3XR5tFa1+b5ytJeuGAQS02d07zNBJNfi+Ftk",
-    "data_ticket": "9gQ088/vC7+jqxfFxBKS2aRx/JjmzJt+8HyuDLJtQBgpVej1hfSG1A0FQKWBbHQh",
-    "bizuin": "3514353238",
-    "mm_lang": "zh_CN",
-    "slave_bizuin": "3514353238",
-    "uuid": "8c5dc8e06af66d00a4b8e8596c8662eb",
-    "ua_id": "y1HZNMSzYCWuaUJDAAAAAApPVJ0a_arX_A5zqoUh6P8=",
-    "wxuin": "52546211515015",
-    "_clck": "msq32d|1|fxm|0",
-    "expiry": 1787106233
-}