'commit'

2 weeks ago · 6c20771fb7
parent b2335f4934
commit 6c20771fb7
6 changed files with 83 additions and 148 deletions
--- a/dsRagAnything/T1_Train.py
+++ b/dsRagAnything/T1_Train.py
@ -1,19 +1,12 @@
 import asyncio
-import sys

-import loguru
 from raganything import RAGAnything, RAGAnythingConfig
-from lightrag.llm.openai import openai_complete_if_cache, openai_embed
-from lightrag.utils import EmbeddingFunc
-from Config.Config import *
+
+from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func


 async def main():
-    # 在main函数开头添加日志配置
-    loguru.logger.remove()  # 移除默认配置
-    loguru.logger.add(sys.stderr, level="INFO")  # 输出INFO及以上级别到控制台

-    # Create RAGAnything configuration
    config = RAGAnythingConfig(
        working_dir="./rag_storage",
        mineru_parse_method="auto",
@ -22,67 +15,10 @@ async def main():
        enable_equation_processing=True,
    )

-    # Define LLM model function
-    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
-        return openai_complete_if_cache(
-            LLM_MODEL_NAME,
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            api_key=LLM_API_KEY,
-            base_url=LLM_BASE_URL,
-            **kwargs,
-        )
-
-    # Define vision model function for image processing
-    def vision_model_func(
-            prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
-    ):
-        if image_data:
-            return openai_complete_if_cache(
-                VISION_MODEL_NAME,  # 这里设置为了硅基流动中免费视觉模型的模型名称
-                "",
-                system_prompt=None,
-                history_messages=[],
-                messages=[
-                    {"role": "system", "content": system_prompt}
-                    if system_prompt
-                    else None,
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": prompt},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{image_data}"
-                                },
-                            },
-                        ],
-                    }
-                    if image_data
-                    else {"role": "user", "content": prompt},
-                ],
-                api_key=VISION_API_KEY,
-                base_url=VISION_BASE_URL,
-                **kwargs,
-            )
-        else:
-            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
-
-    # Define embedding function
-    embedding_func = EmbeddingFunc(
-        embedding_dim=1024,  # 这里设置为了硅基流动中免费模型的嵌入维度
-        max_token_size=8192,
-        func=lambda texts: openai_embed(
-            texts,
-            model=EMBED_MODEL_NAME,
-            api_key=EMBED_API_KEY,
-            base_url=EMBED_BASE_URL,
-        ),
-    )
+    llm_model_func = create_llm_model_func()
+    vision_model_func = create_vision_model_func(llm_model_func)
+    embedding_func = create_embedding_func()

-    # Initialize RAGAnything
    rag = RAGAnything(
        config=config,
        llm_model_func=llm_model_func,
@ -90,7 +26,6 @@ async def main():
        embedding_func=embedding_func,
    )

-    # file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
    file_path = "./Txt/驿来特平台安全.docx"
    await rag.process_document_complete(
        file_path=file_path,
--- a/dsRagAnything/T2_Query.py
+++ b/dsRagAnything/T2_Query.py
@ -1,111 +1,42 @@
 import asyncio
+import os

 from lightrag.kg.shared_storage import initialize_pipeline_status
 from raganything import RAGAnything
 from lightrag import LightRAG
-from lightrag.llm.openai import openai_complete_if_cache, openai_embed
-from lightrag.utils import EmbeddingFunc
-import os
-from Config.Config import *
-
+from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func

 async def load_existing_lightrag():
-    # 首先，创建或加载已存在的 LightRAG 实例
    lightrag_working_dir = "./rag_storage"

-    # 检查是否存在之前的 LightRAG 实例
    if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
        print("✅ 发现已存在的 LightRAG 实例，正在加载...")
    else:
        print("❌ 未找到已存在的 LightRAG 实例，将创建新实例")

-    # 使用您的配置创建/加载 LightRAG 实例
+    llm_model_func = create_llm_model_func()
+    vision_model_func = create_vision_model_func(llm_model_func)
+    embedding_func = create_embedding_func()
+
    lightrag_instance = LightRAG(
        working_dir=lightrag_working_dir,
-        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
-            "deepseek-chat",
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            api_key=LLM_API_KEY,
-            base_url=LLM_BASE_URL,
-            **kwargs,
-        ),
-        embedding_func=EmbeddingFunc(
-            embedding_dim=1024,
-            max_token_size=8192,
-            func=lambda texts: openai_embed(
-                texts,
-                model=EMBED_MODEL_NAME,
-                api_key=EMBED_API_KEY,
-                base_url=EMBED_BASE_URL,
-            ),
-        )
+        llm_model_func=llm_model_func,
+        embedding_func=embedding_func
    )

-    # 初始化存储（如果有现有数据，这将加载它们）
    await lightrag_instance.initialize_storages()
    await initialize_pipeline_status()

-    # 定义视觉模型函数用于图像处理
-    def vision_model_func(
-            prompt, system_prompt=None, history_messages=None, image_data=None, **kwargs
-    ):
-        if history_messages is None:
-            history_messages = []
-        if image_data:
-            return openai_complete_if_cache(
-                VISION_MODEL_NAME,
-                "",
-                system_prompt=None,
-                history_messages=[],
-                messages=[
-                    {"role": "system", "content": system_prompt}
-                    if system_prompt
-                    else None,
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": prompt},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{image_data}"
-                                },
-                            },
-                        ],
-                    }
-                    if image_data
-                    else {"role": "user", "content": prompt},
-                ],
-                api_key=VISION_API_KEY,
-                base_url=VISION_BASE_URL,
-                **kwargs,
-            )
-        else:
-            return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)
-
-    # 现在使用已存在的 LightRAG 实例初始化 RAGAnything
    rag = RAGAnything(
-        lightrag=lightrag_instance,  # 传入已存在的 LightRAG 实例
+        lightrag=lightrag_instance,
        vision_model_func=vision_model_func,
-        # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
    )

-    # 查询已存在的知识库
    result = await rag.aquery(
-        # "黄琬乔的准考证信息告诉我一下？",
        "平台安全的保证方法有哪些？",
        mode="hybrid"
    )
    print("查询结果:", result)

-    # 向已存在的 LightRAG 实例添加新的多模态文档
-    # await rag.process_document_complete(
-    #     file_path="path/to/new/multimodal_document.pdf",
-    #     output_dir="./output"
-    # )
-
-
 if __name__ == "__main__":
    asyncio.run(load_existing_lightrag())
--- a/dsRagAnything/Util/RagUtil.py
+++ b/dsRagAnything/Util/RagUtil.py
@ -0,0 +1,69 @@
+import asyncio
+from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+from lightrag.utils import EmbeddingFunc
+from Config.Config import *
+
+
+def create_llm_model_func():
+    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+        return openai_complete_if_cache(
+            LLM_MODEL_NAME,
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            api_key=LLM_API_KEY,
+            base_url=LLM_BASE_URL,
+            **kwargs,
+        )
+    return llm_model_func
+
+
+def create_vision_model_func(llm_model_func):
+    def vision_model_func(
+            prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
+    ):
+        if image_data:
+            return openai_complete_if_cache(
+                VISION_MODEL_NAME,
+                "",
+                system_prompt=None,
+                history_messages=[],
+                messages=[
+                    {"role": "system", "content": system_prompt}
+                    if system_prompt
+                    else None,
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{image_data}"
+                                },
+                            },
+                        ],
+                    }
+                    if image_data
+                    else {"role": "user", "content": prompt},
+                ],
+                api_key=VISION_API_KEY,
+                base_url=VISION_BASE_URL,
+                **kwargs,
+            )
+        else:
+            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
+    return vision_model_func
+
+
+def create_embedding_func():
+    return EmbeddingFunc(
+        embedding_dim=1024,
+        max_token_size=8192,
+        func=lambda texts: openai_embed(
+            texts,
+            model=EMBED_MODEL_NAME,
+            api_key=EMBED_API_KEY,
+            base_url=EMBED_BASE_URL,
+        ),
+    )
--- a/dsRagAnything/Util/init.py
+++ b/dsRagAnything/Util/init.py
--- a/dsRagAnything/Util/pycache/RagUtil.cpython-310.pyc
+++ b/dsRagAnything/Util/pycache/RagUtil.cpython-310.pyc
--- a/dsRagAnything/Util/pycache/init.cpython-310.pyc
+++ b/dsRagAnything/Util/pycache/init.cpython-310.pyc