You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

110 lines
4.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import asyncio
from lightrag.kg.shared_storage import initialize_pipeline_status
from raganything import RAGAnything
from lightrag import LightRAG
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os
async def load_existing_lightrag():
# 设置 API 配置
api_key = "sk-44ae895eeb614aa1a9c6460579e322f1"
base_url = "https://api.deepseek.com" # Optional
# 首先,创建或加载已存在的 LightRAG 实例
lightrag_working_dir = "./rag_storage"
# 检查是否存在之前的 LightRAG 实例
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
print("✅ 发现已存在的 LightRAG 实例,正在加载...")
else:
print("❌ 未找到已存在的 LightRAG 实例,将创建新实例")
# 使用您的配置创建/加载 LightRAG 实例
lightrag_instance = LightRAG(
working_dir=lightrag_working_dir,
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
"deepseek-chat",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=api_key,
base_url=base_url,
**kwargs,
),
embedding_func=EmbeddingFunc(
embedding_dim=1024,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model="BAAI/bge-m3",
api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
base_url="https://api.siliconflow.cn/v1",
),
)
)
# 初始化存储(如果有现有数据,这将加载它们)
await lightrag_instance.initialize_storages()
await initialize_pipeline_status()
# 定义视觉模型函数用于图像处理
def vision_model_func(
prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
if image_data:
return openai_complete_if_cache(
"GLM-4.1V-9B-Thinking",
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt}
if system_prompt
else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
base_url="https://api.siliconflow.cn/v1/chat/completions",
**kwargs,
)
else:
return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)
# 现在使用已存在的 LightRAG 实例初始化 RAGAnything
rag = RAGAnything(
lightrag=lightrag_instance, # 传入已存在的 LightRAG 实例
vision_model_func=vision_model_func,
# 注意working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
)
# 查询已存在的知识库
result = await rag.aquery(
#"黄琬乔的准考证信息告诉我一下?",
"平台安全的保证方法有哪些?",
mode="hybrid"
)
print("查询结果:", result)
# 向已存在的 LightRAG 实例添加新的多模态文档
# await rag.process_document_complete(
# file_path="path/to/new/multimodal_document.pdf",
# output_dir="./output"
# )
if __name__ == "__main__":
asyncio.run(load_existing_lightrag())