diff --git a/dsRagAnything/Config/Config.py b/dsRagAnything/Config/Config.py new file mode 100644 index 00000000..ac8373ff --- /dev/null +++ b/dsRagAnything/Config/Config.py @@ -0,0 +1,14 @@ +# 嵌入模型 +EMBED_MODEL_NAME = "BAAI/bge-m3" +EMBED_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl" +EMBED_BASE_URL = "https://api.siliconflow.cn/v1" + +# 大模型 +LLM_API_KEY="sk-44ae895eeb614aa1a9c6460579e322f1" +LLM_BASE_URL = "https://api.deepseek.com" +LLM_MODEL_NAME = "deepseek-chat" + +# 视觉模型 +VISION_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl" +VISION_BASE_URL = "https://api.siliconflow.cn/v1/chat/completions" +VISION_MODEL_NAME = "GLM-4.1V-9B-Thinking" \ No newline at end of file diff --git a/dsRagAnything/Config/__init__.py b/dsRagAnything/Config/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsRagAnything/T1_Train.py b/dsRagAnything/T1_Train.py index bd26d528..958fc24e 100644 --- a/dsRagAnything/T1_Train.py +++ b/dsRagAnything/T1_Train.py @@ -5,6 +5,7 @@ import loguru from raganything import RAGAnything, RAGAnythingConfig from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.utils import EmbeddingFunc +from Config.Config import * async def main(): @@ -12,10 +13,6 @@ async def main(): loguru.logger.remove() # 移除默认配置 loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台 - # Set up API configuration - api_key = "sk-44ae895eeb614aa1a9c6460579e322f1" - base_url = "https://api.deepseek.com" # Optional - # Create RAGAnything configuration config = RAGAnythingConfig( working_dir="./rag_storage", @@ -28,12 +25,12 @@ async def main(): # Define LLM model function def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): return openai_complete_if_cache( - "deepseek-chat", + LLM_MODEL_NAME, prompt, system_prompt=system_prompt, history_messages=history_messages, - api_key=api_key, - base_url=base_url, + api_key=LLM_API_KEY, + base_url=LLM_BASE_URL, **kwargs, ) @@ -43,7 +40,7 @@ async def main(): ): if image_data: return openai_complete_if_cache( - "GLM-4.1V-9B-Thinking", # 这里设置为了硅基流动中免费视觉模型的模型名称 + VISION_MODEL_NAME, # 这里设置为了硅基流动中免费视觉模型的模型名称 "", system_prompt=None, history_messages=[], @@ -66,8 +63,8 @@ async def main(): if image_data else {"role": "user", "content": prompt}, ], - api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl", - base_url="https://api.siliconflow.cn/v1/chat/completions", + api_key=VISION_API_KEY, + base_url=VISION_BASE_URL, **kwargs, ) else: @@ -79,9 +76,9 @@ async def main(): max_token_size=8192, func=lambda texts: openai_embed( texts, - model="BAAI/bge-m3", - api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl", - base_url="https://api.siliconflow.cn/v1", + model=EMBED_MODEL_NAME, + api_key=EMBED_API_KEY, + base_url=EMBED_BASE_URL, ), ) @@ -93,39 +90,16 @@ async def main(): embedding_func=embedding_func, ) - #file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf" + # file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf" file_path = "./Txt/驿来特平台安全.docx" - # Process a document await rag.process_document_complete( file_path=file_path, output_dir="./output", parse_method="auto" ) - print("Processing complete.") - # 调用LightRAG的代码,包括:1、创建索引,2、查询知识库,不应该放在这个模块中完成,这里只需要完成到文本解析完成即可。 - # Query the processed content - # Pure text query - for basic knowledge base search - # text_result = await rag.aquery( - # "这个文档是做什么用的?", - # mode="hybrid" - # ) - # print("Text query result:", text_result) - - # # Multimodal query with specific multimodal content - # multimodal_result = await rag.aquery_with_multimodal( - # "Explain this formula and its relevance to the document content", - # multimodal_content=[{ - # "type": "equation", - # "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}", - # "equation_caption": "Document relevance probability" - # }], - # mode="hybrid" - # ) - # print("Multimodal query result:", multimodal_result) - if __name__ == "__main__": asyncio.run(main())