main
HuangHai 2 weeks ago
parent b2335f4934
commit 6c20771fb7

@ -1,19 +1,12 @@
import asyncio
import sys
import loguru
from raganything import RAGAnything, RAGAnythingConfig
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from Config.Config import *
from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
async def main():
# 在main函数开头添加日志配置
loguru.logger.remove() # 移除默认配置
loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台
# Create RAGAnything configuration
config = RAGAnythingConfig(
working_dir="./rag_storage",
mineru_parse_method="auto",
@ -22,67 +15,10 @@ async def main():
enable_equation_processing=True,
)
# Define LLM model function
def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return openai_complete_if_cache(
LLM_MODEL_NAME,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=LLM_API_KEY,
base_url=LLM_BASE_URL,
**kwargs,
)
# Define vision model function for image processing
def vision_model_func(
prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
if image_data:
return openai_complete_if_cache(
VISION_MODEL_NAME, # 这里设置为了硅基流动中免费视觉模型的模型名称
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt}
if system_prompt
else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
api_key=VISION_API_KEY,
base_url=VISION_BASE_URL,
**kwargs,
)
else:
return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
# Define embedding function
embedding_func = EmbeddingFunc(
embedding_dim=1024, # 这里设置为了硅基流动中免费模型的嵌入维度
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model=EMBED_MODEL_NAME,
api_key=EMBED_API_KEY,
base_url=EMBED_BASE_URL,
),
)
llm_model_func = create_llm_model_func()
vision_model_func = create_vision_model_func(llm_model_func)
embedding_func = create_embedding_func()
# Initialize RAGAnything
rag = RAGAnything(
config=config,
llm_model_func=llm_model_func,
@ -90,7 +26,6 @@ async def main():
embedding_func=embedding_func,
)
# file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
file_path = "./Txt/驿来特平台安全.docx"
await rag.process_document_complete(
file_path=file_path,

@ -1,111 +1,42 @@
import asyncio
import os
from lightrag.kg.shared_storage import initialize_pipeline_status
from raganything import RAGAnything
from lightrag import LightRAG
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
import os
from Config.Config import *
from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
async def load_existing_lightrag():
# 首先,创建或加载已存在的 LightRAG 实例
lightrag_working_dir = "./rag_storage"
# 检查是否存在之前的 LightRAG 实例
if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
print("✅ 发现已存在的 LightRAG 实例,正在加载...")
else:
print("❌ 未找到已存在的 LightRAG 实例,将创建新实例")
# 使用您的配置创建/加载 LightRAG 实例
llm_model_func = create_llm_model_func()
vision_model_func = create_vision_model_func(llm_model_func)
embedding_func = create_embedding_func()
lightrag_instance = LightRAG(
working_dir=lightrag_working_dir,
llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
"deepseek-chat",
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=LLM_API_KEY,
base_url=LLM_BASE_URL,
**kwargs,
),
embedding_func=EmbeddingFunc(
embedding_dim=1024,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model=EMBED_MODEL_NAME,
api_key=EMBED_API_KEY,
base_url=EMBED_BASE_URL,
),
)
llm_model_func=llm_model_func,
embedding_func=embedding_func
)
# 初始化存储(如果有现有数据,这将加载它们)
await lightrag_instance.initialize_storages()
await initialize_pipeline_status()
# 定义视觉模型函数用于图像处理
def vision_model_func(
prompt, system_prompt=None, history_messages=None, image_data=None, **kwargs
):
if history_messages is None:
history_messages = []
if image_data:
return openai_complete_if_cache(
VISION_MODEL_NAME,
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt}
if system_prompt
else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
api_key=VISION_API_KEY,
base_url=VISION_BASE_URL,
**kwargs,
)
else:
return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)
# 现在使用已存在的 LightRAG 实例初始化 RAGAnything
rag = RAGAnything(
lightrag=lightrag_instance, # 传入已存在的 LightRAG 实例
lightrag=lightrag_instance,
vision_model_func=vision_model_func,
# 注意working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
)
# 查询已存在的知识库
result = await rag.aquery(
# "黄琬乔的准考证信息告诉我一下?",
"平台安全的保证方法有哪些?",
mode="hybrid"
)
print("查询结果:", result)
# 向已存在的 LightRAG 实例添加新的多模态文档
# await rag.process_document_complete(
# file_path="path/to/new/multimodal_document.pdf",
# output_dir="./output"
# )
if __name__ == "__main__":
asyncio.run(load_existing_lightrag())

@ -0,0 +1,69 @@
import asyncio
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from Config.Config import *
def create_llm_model_func():
def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return openai_complete_if_cache(
LLM_MODEL_NAME,
prompt,
system_prompt=system_prompt,
history_messages=history_messages,
api_key=LLM_API_KEY,
base_url=LLM_BASE_URL,
**kwargs,
)
return llm_model_func
def create_vision_model_func(llm_model_func):
def vision_model_func(
prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
):
if image_data:
return openai_complete_if_cache(
VISION_MODEL_NAME,
"",
system_prompt=None,
history_messages=[],
messages=[
{"role": "system", "content": system_prompt}
if system_prompt
else None,
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_data}"
},
},
],
}
if image_data
else {"role": "user", "content": prompt},
],
api_key=VISION_API_KEY,
base_url=VISION_BASE_URL,
**kwargs,
)
else:
return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
return vision_model_func
def create_embedding_func():
return EmbeddingFunc(
embedding_dim=1024,
max_token_size=8192,
func=lambda texts: openai_embed(
texts,
model=EMBED_MODEL_NAME,
api_key=EMBED_API_KEY,
base_url=EMBED_BASE_URL,
),
)
Loading…
Cancel
Save