dsProject/dsRagAnything/T2_Query.py

import asyncio

from lightrag.kg.shared_storage import initialize_pipeline_status
from raganything import RAGAnything
from lightrag import LightRAG
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os

async def load_existing_lightrag():
    # 设置 API 配置
    api_key = "sk-44ae895eeb614aa1a9c6460579e322f1"
    base_url = "https://api.deepseek.com"  # Optional

    # 首先，创建或加载已存在的 LightRAG 实例
    lightrag_working_dir = "./rag_storage"

    # 检查是否存在之前的 LightRAG 实例
    if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
        print("✅ 发现已存在的 LightRAG 实例，正在加载...")
    else:
        print("❌ 未找到已存在的 LightRAG 实例，将创建新实例")

    # 使用您的配置创建/加载 LightRAG 实例
    lightrag_instance = LightRAG(
        working_dir=lightrag_working_dir,
        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
            "deepseek-chat",
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
            api_key=api_key,
            base_url=base_url,
            **kwargs,
        ),
        embedding_func=EmbeddingFunc(
            embedding_dim=1024,
            max_token_size=8192,
            func=lambda texts: openai_embed(
                texts,
                model="BAAI/bge-m3",
                api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
                base_url="https://api.siliconflow.cn/v1",
            ),
        )
    )

    # 初始化存储（如果有现有数据，这将加载它们）
    await lightrag_instance.initialize_storages()
    await initialize_pipeline_status()

    # 定义视觉模型函数用于图像处理
    def vision_model_func(
        prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
    ):
        if image_data:
            return openai_complete_if_cache(
                "GLM-4.1V-9B-Thinking",
                "",
                system_prompt=None,
                history_messages=[],
                messages=[
                    {"role": "system", "content": system_prompt}
                    if system_prompt
                    else None,
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": prompt},
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": f"data:image/jpeg;base64,{image_data}"
                                },
                            },
                        ],
                    }
                    if image_data
                    else {"role": "user", "content": prompt},
                ],
                api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
                base_url="https://api.siliconflow.cn/v1/chat/completions",
                **kwargs,
            )
        else:
            return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)

    # 现在使用已存在的 LightRAG 实例初始化 RAGAnything
    rag = RAGAnything(
        lightrag=lightrag_instance,  # 传入已存在的 LightRAG 实例
        vision_model_func=vision_model_func,
        # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
    )

    # 查询已存在的知识库
    result = await rag.aquery(
        #"黄琬乔的准考证信息告诉我一下？",
        "平台安全的保证方法有哪些？",
        mode="hybrid"
    )
    print("查询结果:", result)

    # 向已存在的 LightRAG 实例添加新的多模态文档
    # await rag.process_document_complete(
    #     file_path="path/to/new/multimodal_document.pdf",
    #     output_dir="./output"
    # )

if __name__ == "__main__":
    asyncio.run(load_existing_lightrag())