|
|
|
|
import asyncio
|
|
|
|
|
|
|
|
|
|
from lightrag.kg.shared_storage import initialize_pipeline_status
|
|
|
|
|
from raganything import RAGAnything
|
|
|
|
|
from lightrag import LightRAG
|
|
|
|
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
|
|
|
|
from lightrag.utils import EmbeddingFunc
|
|
|
|
|
import os
|
|
|
|
|
from Config.Config import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def load_existing_lightrag():
|
|
|
|
|
# 首先,创建或加载已存在的 LightRAG 实例
|
|
|
|
|
lightrag_working_dir = "./rag_storage"
|
|
|
|
|
|
|
|
|
|
# 检查是否存在之前的 LightRAG 实例
|
|
|
|
|
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
|
|
|
|
|
print("✅ 发现已存在的 LightRAG 实例,正在加载...")
|
|
|
|
|
else:
|
|
|
|
|
print("❌ 未找到已存在的 LightRAG 实例,将创建新实例")
|
|
|
|
|
|
|
|
|
|
# 使用您的配置创建/加载 LightRAG 实例
|
|
|
|
|
lightrag_instance = LightRAG(
|
|
|
|
|
working_dir=lightrag_working_dir,
|
|
|
|
|
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
|
|
|
|
|
"deepseek-chat",
|
|
|
|
|
prompt,
|
|
|
|
|
system_prompt=system_prompt,
|
|
|
|
|
history_messages=history_messages,
|
|
|
|
|
api_key=LLM_API_KEY,
|
|
|
|
|
base_url=LLM_BASE_URL,
|
|
|
|
|
**kwargs,
|
|
|
|
|
),
|
|
|
|
|
embedding_func=EmbeddingFunc(
|
|
|
|
|
embedding_dim=1024,
|
|
|
|
|
max_token_size=8192,
|
|
|
|
|
func=lambda texts: openai_embed(
|
|
|
|
|
texts,
|
|
|
|
|
model=EMBED_MODEL_NAME,
|
|
|
|
|
api_key=EMBED_API_KEY,
|
|
|
|
|
base_url=EMBED_BASE_URL,
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 初始化存储(如果有现有数据,这将加载它们)
|
|
|
|
|
await lightrag_instance.initialize_storages()
|
|
|
|
|
await initialize_pipeline_status()
|
|
|
|
|
|
|
|
|
|
# 定义视觉模型函数用于图像处理
|
|
|
|
|
def vision_model_func(
|
|
|
|
|
prompt, system_prompt=None, history_messages=None, image_data=None, **kwargs
|
|
|
|
|
):
|
|
|
|
|
if history_messages is None:
|
|
|
|
|
history_messages = []
|
|
|
|
|
if image_data:
|
|
|
|
|
return openai_complete_if_cache(
|
|
|
|
|
VISION_MODEL_NAME,
|
|
|
|
|
"",
|
|
|
|
|
system_prompt=None,
|
|
|
|
|
history_messages=[],
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": system_prompt}
|
|
|
|
|
if system_prompt
|
|
|
|
|
else None,
|
|
|
|
|
{
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": [
|
|
|
|
|
{"type": "text", "text": prompt},
|
|
|
|
|
{
|
|
|
|
|
"type": "image_url",
|
|
|
|
|
"image_url": {
|
|
|
|
|
"url": f"data:image/jpeg;base64,{image_data}"
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
if image_data
|
|
|
|
|
else {"role": "user", "content": prompt},
|
|
|
|
|
],
|
|
|
|
|
api_key=VISION_API_KEY,
|
|
|
|
|
base_url=VISION_BASE_URL,
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)
|
|
|
|
|
|
|
|
|
|
# 现在使用已存在的 LightRAG 实例初始化 RAGAnything
|
|
|
|
|
rag = RAGAnything(
|
|
|
|
|
lightrag=lightrag_instance, # 传入已存在的 LightRAG 实例
|
|
|
|
|
vision_model_func=vision_model_func,
|
|
|
|
|
# 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 查询已存在的知识库
|
|
|
|
|
result = await rag.aquery(
|
|
|
|
|
# "黄琬乔的准考证信息告诉我一下?",
|
|
|
|
|
"平台安全的保证方法有哪些?",
|
|
|
|
|
mode="hybrid"
|
|
|
|
|
)
|
|
|
|
|
print("查询结果:", result)
|
|
|
|
|
|
|
|
|
|
# 向已存在的 LightRAG 实例添加新的多模态文档
|
|
|
|
|
# await rag.process_document_complete(
|
|
|
|
|
# file_path="path/to/new/multimodal_document.pdf",
|
|
|
|
|
# output_dir="./output"
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
asyncio.run(load_existing_lightrag())
|