|
|
|
@ -5,6 +5,7 @@ import loguru
|
|
|
|
|
from raganything import RAGAnything, RAGAnythingConfig
|
|
|
|
|
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
|
|
|
|
|
from lightrag.utils import EmbeddingFunc
|
|
|
|
|
from Config.Config import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def main():
|
|
|
|
@ -12,10 +13,6 @@ async def main():
|
|
|
|
|
loguru.logger.remove() # 移除默认配置
|
|
|
|
|
loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台
|
|
|
|
|
|
|
|
|
|
# Set up API configuration
|
|
|
|
|
api_key = "sk-44ae895eeb614aa1a9c6460579e322f1"
|
|
|
|
|
base_url = "https://api.deepseek.com" # Optional
|
|
|
|
|
|
|
|
|
|
# Create RAGAnything configuration
|
|
|
|
|
config = RAGAnythingConfig(
|
|
|
|
|
working_dir="./rag_storage",
|
|
|
|
@ -28,12 +25,12 @@ async def main():
|
|
|
|
|
# Define LLM model function
|
|
|
|
|
def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
|
|
|
|
|
return openai_complete_if_cache(
|
|
|
|
|
"deepseek-chat",
|
|
|
|
|
LLM_MODEL_NAME,
|
|
|
|
|
prompt,
|
|
|
|
|
system_prompt=system_prompt,
|
|
|
|
|
history_messages=history_messages,
|
|
|
|
|
api_key=api_key,
|
|
|
|
|
base_url=base_url,
|
|
|
|
|
api_key=LLM_API_KEY,
|
|
|
|
|
base_url=LLM_BASE_URL,
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
@ -43,7 +40,7 @@ async def main():
|
|
|
|
|
):
|
|
|
|
|
if image_data:
|
|
|
|
|
return openai_complete_if_cache(
|
|
|
|
|
"GLM-4.1V-9B-Thinking", # 这里设置为了硅基流动中免费视觉模型的模型名称
|
|
|
|
|
VISION_MODEL_NAME, # 这里设置为了硅基流动中免费视觉模型的模型名称
|
|
|
|
|
"",
|
|
|
|
|
system_prompt=None,
|
|
|
|
|
history_messages=[],
|
|
|
|
@ -66,8 +63,8 @@ async def main():
|
|
|
|
|
if image_data
|
|
|
|
|
else {"role": "user", "content": prompt},
|
|
|
|
|
],
|
|
|
|
|
api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
|
|
|
|
|
base_url="https://api.siliconflow.cn/v1/chat/completions",
|
|
|
|
|
api_key=VISION_API_KEY,
|
|
|
|
|
base_url=VISION_BASE_URL,
|
|
|
|
|
**kwargs,
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
@ -79,9 +76,9 @@ async def main():
|
|
|
|
|
max_token_size=8192,
|
|
|
|
|
func=lambda texts: openai_embed(
|
|
|
|
|
texts,
|
|
|
|
|
model="BAAI/bge-m3",
|
|
|
|
|
api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
|
|
|
|
|
base_url="https://api.siliconflow.cn/v1",
|
|
|
|
|
model=EMBED_MODEL_NAME,
|
|
|
|
|
api_key=EMBED_API_KEY,
|
|
|
|
|
base_url=EMBED_BASE_URL,
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
@ -93,39 +90,16 @@ async def main():
|
|
|
|
|
embedding_func=embedding_func,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
#file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
|
|
|
|
|
# file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
|
|
|
|
|
file_path = "./Txt/驿来特平台安全.docx"
|
|
|
|
|
# Process a document
|
|
|
|
|
await rag.process_document_complete(
|
|
|
|
|
file_path=file_path,
|
|
|
|
|
output_dir="./output",
|
|
|
|
|
parse_method="auto"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print("Processing complete.")
|
|
|
|
|
|
|
|
|
|
# 调用LightRAG的代码,包括:1、创建索引,2、查询知识库,不应该放在这个模块中完成,这里只需要完成到文本解析完成即可。
|
|
|
|
|
# Query the processed content
|
|
|
|
|
# Pure text query - for basic knowledge base search
|
|
|
|
|
# text_result = await rag.aquery(
|
|
|
|
|
# "这个文档是做什么用的?",
|
|
|
|
|
# mode="hybrid"
|
|
|
|
|
# )
|
|
|
|
|
# print("Text query result:", text_result)
|
|
|
|
|
|
|
|
|
|
# # Multimodal query with specific multimodal content
|
|
|
|
|
# multimodal_result = await rag.aquery_with_multimodal(
|
|
|
|
|
# "Explain this formula and its relevance to the document content",
|
|
|
|
|
# multimodal_content=[{
|
|
|
|
|
# "type": "equation",
|
|
|
|
|
# "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}",
|
|
|
|
|
# "equation_caption": "Document relevance probability"
|
|
|
|
|
# }],
|
|
|
|
|
# mode="hybrid"
|
|
|
|
|
# )
|
|
|
|
|
# print("Multimodal query result:", multimodal_result)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
asyncio.run(main())
|
|
|
|
|