'commit'

2 weeks ago · 19bc923a43
parent f1513355ed
commit 19bc923a43
3 changed files with 25 additions and 37 deletions
--- a/dsRagAnything/Config/Config.py
+++ b/dsRagAnything/Config/Config.py
@ -0,0 +1,14 @@
 # 嵌入模型
 EMBED_MODEL_NAME = "BAAI/bge-m3"
 EMBED_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl"
 EMBED_BASE_URL = "https://api.siliconflow.cn/v1"
 # 大模型
 LLM_API_KEY="sk-44ae895eeb614aa1a9c6460579e322f1"
 LLM_BASE_URL = "https://api.deepseek.com"
 LLM_MODEL_NAME = "deepseek-chat"
 # 视觉模型
 VISION_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl"
 VISION_BASE_URL = "https://api.siliconflow.cn/v1/chat/completions"
 VISION_MODEL_NAME = "GLM-4.1V-9B-Thinking"
--- a/dsRagAnything/Config/init.py
+++ b/dsRagAnything/Config/init.py
--- a/dsRagAnything/T1_Train.py
+++ b/dsRagAnything/T1_Train.py
@ -5,6 +5,7 @@ import loguru
 from raganything import RAGAnything, RAGAnythingConfig
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc
 from Config.Config import *
 async def main():
@ -12,10 +13,6 @@ async def main():
    loguru.logger.remove()  # 移除默认配置
    loguru.logger.add(sys.stderr, level="INFO")  # 输出INFO及以上级别到控制台
    # Set up API configuration
    api_key = "sk-44ae895eeb614aa1a9c6460579e322f1"
    base_url = "https://api.deepseek.com"  # Optional
    # Create RAGAnything configuration
    config = RAGAnythingConfig(
        working_dir="./rag_storage",
@ -28,12 +25,12 @@ async def main():
    # Define LLM model function
    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
        return openai_complete_if_cache(
-            "deepseek-chat",
+            LLM_MODEL_NAME,
            prompt,
            system_prompt=system_prompt,
            history_messages=history_messages,
-            api_key=api_key,
+            api_key=LLM_API_KEY,
-            base_url=base_url,
+            base_url=LLM_BASE_URL,
            **kwargs,
        )
@ -43,7 +40,7 @@ async def main():
    ):
        if image_data:
            return openai_complete_if_cache(
-                "GLM-4.1V-9B-Thinking",  # 这里设置为了硅基流动中免费视觉模型的模型名称
+                VISION_MODEL_NAME,  # 这里设置为了硅基流动中免费视觉模型的模型名称
                "",
                system_prompt=None,
                history_messages=[],
@ -66,8 +63,8 @@ async def main():
                    if image_data
                    else {"role": "user", "content": prompt},
                ],
-                api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
+                api_key=VISION_API_KEY,
-                base_url="https://api.siliconflow.cn/v1/chat/completions",
+                base_url=VISION_BASE_URL,
                **kwargs,
            )
        else:
@ -79,9 +76,9 @@ async def main():
        max_token_size=8192,
        func=lambda texts: openai_embed(
            texts,
-            model="BAAI/bge-m3",
+            model=EMBED_MODEL_NAME,
-            api_key="sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl",
+            api_key=EMBED_API_KEY,
-            base_url="https://api.siliconflow.cn/v1",
+            base_url=EMBED_BASE_URL,
        ),
    )
@ -93,39 +90,16 @@ async def main():
        embedding_func=embedding_func,
    )
-    #file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
+    # file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
    file_path = "./Txt/驿来特平台安全.docx"
    # Process a document
    await rag.process_document_complete(
        file_path=file_path,
        output_dir="./output",
        parse_method="auto"
    )
    print("Processing complete.")
    # 调用LightRAG的代码，包括:1、创建索引，2、查询知识库，不应该放在这个模块中完成，这里只需要完成到文本解析完成即可。
    # Query the processed content
    # Pure text query - for basic knowledge base search
    # text_result = await rag.aquery(
    #     "这个文档是做什么用的?",
    #     mode="hybrid"
    # )
    # print("Text query result:", text_result)
    # # Multimodal query with specific multimodal content
    # multimodal_result = await rag.aquery_with_multimodal(
    #     "Explain this formula and its relevance to the document content",
    #     multimodal_content=[{
    #         "type": "equation",
    #         "latex": "P(d|q) = \\frac{P(q|d) \\cdot P(d)}{P(q)}",
    #         "equation_caption": "Document relevance probability"
    #     }],
    #     mode="hybrid"
    # )
    # print("Multimodal query result:", multimodal_result)
 if __name__ == "__main__":
    asyncio.run(main())