From 6c20771fb7c9f9d94885574e7978d51183d486df Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Sun, 6 Jul 2025 19:42:58 +0800
Subject: [PATCH] 'commit'

---
 dsRagAnything/T1_Train.py                     |  75 +--------------
 dsRagAnything/T2_Query.py                     |  87 ++----------------
 dsRagAnything/Util/RagUtil.py                 |  69 ++++++++++++++
 dsRagAnything/Util/__init__.py                |   0
 .../Util/__pycache__/RagUtil.cpython-310.pyc  | Bin 0 -> 1860 bytes
 .../Util/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 143 bytes
 6 files changed, 83 insertions(+), 148 deletions(-)
 create mode 100644 dsRagAnything/Util/RagUtil.py
 create mode 100644 dsRagAnything/Util/__init__.py
 create mode 100644 dsRagAnything/Util/__pycache__/RagUtil.cpython-310.pyc
 create mode 100644 dsRagAnything/Util/__pycache__/__init__.cpython-310.pyc

diff --git a/dsRagAnything/T1_Train.py b/dsRagAnything/T1_Train.py
index 4dd4acfa..a6f52177 100644
--- a/dsRagAnything/T1_Train.py
+++ b/dsRagAnything/T1_Train.py
@@ -1,19 +1,12 @@
 import asyncio
-import sys
 
-import loguru
 from raganything import RAGAnything, RAGAnythingConfig
-from lightrag.llm.openai import openai_complete_if_cache, openai_embed
-from lightrag.utils import EmbeddingFunc
-from Config.Config import *
+
+from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
 
 
 async def main():
-    # 在main函数开头添加日志配置
-    loguru.logger.remove()  # 移除默认配置
-    loguru.logger.add(sys.stderr, level="INFO")  # 输出INFO及以上级别到控制台
 
-    # Create RAGAnything configuration
     config = RAGAnythingConfig(
         working_dir="./rag_storage",
         mineru_parse_method="auto",
@@ -22,67 +15,10 @@ async def main():
         enable_equation_processing=True,
     )
 
-    # Define LLM model function
-    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
-        return openai_complete_if_cache(
-            LLM_MODEL_NAME,
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            api_key=LLM_API_KEY,
-            base_url=LLM_BASE_URL,
-            **kwargs,
-        )
-
-    # Define vision model function for image processing
-    def vision_model_func(
-            prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
-    ):
-        if image_data:
-            return openai_complete_if_cache(
-                VISION_MODEL_NAME,  # 这里设置为了硅基流动中免费视觉模型的模型名称
-                "",
-                system_prompt=None,
-                history_messages=[],
-                messages=[
-                    {"role": "system", "content": system_prompt}
-                    if system_prompt
-                    else None,
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": prompt},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{image_data}"
-                                },
-                            },
-                        ],
-                    }
-                    if image_data
-                    else {"role": "user", "content": prompt},
-                ],
-                api_key=VISION_API_KEY,
-                base_url=VISION_BASE_URL,
-                **kwargs,
-            )
-        else:
-            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
-
-    # Define embedding function
-    embedding_func = EmbeddingFunc(
-        embedding_dim=1024,  # 这里设置为了硅基流动中免费模型的嵌入维度
-        max_token_size=8192,
-        func=lambda texts: openai_embed(
-            texts,
-            model=EMBED_MODEL_NAME,
-            api_key=EMBED_API_KEY,
-            base_url=EMBED_BASE_URL,
-        ),
-    )
+    llm_model_func = create_llm_model_func()
+    vision_model_func = create_vision_model_func(llm_model_func)
+    embedding_func = create_embedding_func()
 
-    # Initialize RAGAnything
     rag = RAGAnything(
         config=config,
         llm_model_func=llm_model_func,
@@ -90,7 +26,6 @@ async def main():
         embedding_func=embedding_func,
     )
 
-    # file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf"
     file_path = "./Txt/驿来特平台安全.docx"
     await rag.process_document_complete(
         file_path=file_path,
diff --git a/dsRagAnything/T2_Query.py b/dsRagAnything/T2_Query.py
index 0952d6a8..d3567372 100644
--- a/dsRagAnything/T2_Query.py
+++ b/dsRagAnything/T2_Query.py
@@ -1,111 +1,42 @@
 import asyncio
+import os
 
 from lightrag.kg.shared_storage import initialize_pipeline_status
 from raganything import RAGAnything
 from lightrag import LightRAG
-from lightrag.llm.openai import openai_complete_if_cache, openai_embed
-from lightrag.utils import EmbeddingFunc
-import os
-from Config.Config import *
-
+from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func
 
 async def load_existing_lightrag():
-    # 首先，创建或加载已存在的 LightRAG 实例
     lightrag_working_dir = "./rag_storage"
 
-    # 检查是否存在之前的 LightRAG 实例
     if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir):
         print("✅ 发现已存在的 LightRAG 实例，正在加载...")
     else:
         print("❌ 未找到已存在的 LightRAG 实例，将创建新实例")
 
-    # 使用您的配置创建/加载 LightRAG 实例
+    llm_model_func = create_llm_model_func()
+    vision_model_func = create_vision_model_func(llm_model_func)
+    embedding_func = create_embedding_func()
+
     lightrag_instance = LightRAG(
         working_dir=lightrag_working_dir,
-        llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache(
-            "deepseek-chat",
-            prompt,
-            system_prompt=system_prompt,
-            history_messages=history_messages,
-            api_key=LLM_API_KEY,
-            base_url=LLM_BASE_URL,
-            **kwargs,
-        ),
-        embedding_func=EmbeddingFunc(
-            embedding_dim=1024,
-            max_token_size=8192,
-            func=lambda texts: openai_embed(
-                texts,
-                model=EMBED_MODEL_NAME,
-                api_key=EMBED_API_KEY,
-                base_url=EMBED_BASE_URL,
-            ),
-        )
+        llm_model_func=llm_model_func,
+        embedding_func=embedding_func
     )
 
-    # 初始化存储（如果有现有数据，这将加载它们）
     await lightrag_instance.initialize_storages()
     await initialize_pipeline_status()
 
-    # 定义视觉模型函数用于图像处理
-    def vision_model_func(
-            prompt, system_prompt=None, history_messages=None, image_data=None, **kwargs
-    ):
-        if history_messages is None:
-            history_messages = []
-        if image_data:
-            return openai_complete_if_cache(
-                VISION_MODEL_NAME,
-                "",
-                system_prompt=None,
-                history_messages=[],
-                messages=[
-                    {"role": "system", "content": system_prompt}
-                    if system_prompt
-                    else None,
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": prompt},
-                            {
-                                "type": "image_url",
-                                "image_url": {
-                                    "url": f"data:image/jpeg;base64,{image_data}"
-                                },
-                            },
-                        ],
-                    }
-                    if image_data
-                    else {"role": "user", "content": prompt},
-                ],
-                api_key=VISION_API_KEY,
-                base_url=VISION_BASE_URL,
-                **kwargs,
-            )
-        else:
-            return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs)
-
-    # 现在使用已存在的 LightRAG 实例初始化 RAGAnything
     rag = RAGAnything(
-        lightrag=lightrag_instance,  # 传入已存在的 LightRAG 实例
+        lightrag=lightrag_instance,
         vision_model_func=vision_model_func,
-        # 注意：working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承
     )
 
-    # 查询已存在的知识库
     result = await rag.aquery(
-        # "黄琬乔的准考证信息告诉我一下？",
         "平台安全的保证方法有哪些？",
         mode="hybrid"
     )
     print("查询结果:", result)
 
-    # 向已存在的 LightRAG 实例添加新的多模态文档
-    # await rag.process_document_complete(
-    #     file_path="path/to/new/multimodal_document.pdf",
-    #     output_dir="./output"
-    # )
-
-
 if __name__ == "__main__":
     asyncio.run(load_existing_lightrag())
diff --git a/dsRagAnything/Util/RagUtil.py b/dsRagAnything/Util/RagUtil.py
new file mode 100644
index 00000000..e9c2d87c
--- /dev/null
+++ b/dsRagAnything/Util/RagUtil.py
@@ -0,0 +1,69 @@
+import asyncio
+from lightrag.llm.openai import openai_complete_if_cache, openai_embed
+from lightrag.utils import EmbeddingFunc
+from Config.Config import *
+
+
+def create_llm_model_func():
+    def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs):
+        return openai_complete_if_cache(
+            LLM_MODEL_NAME,
+            prompt,
+            system_prompt=system_prompt,
+            history_messages=history_messages,
+            api_key=LLM_API_KEY,
+            base_url=LLM_BASE_URL,
+            **kwargs,
+        )
+    return llm_model_func
+
+
+def create_vision_model_func(llm_model_func):
+    def vision_model_func(
+            prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs
+    ):
+        if image_data:
+            return openai_complete_if_cache(
+                VISION_MODEL_NAME,
+                "",
+                system_prompt=None,
+                history_messages=[],
+                messages=[
+                    {"role": "system", "content": system_prompt}
+                    if system_prompt
+                    else None,
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": prompt},
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/jpeg;base64,{image_data}"
+                                },
+                            },
+                        ],
+                    }
+                    if image_data
+                    else {"role": "user", "content": prompt},
+                ],
+                api_key=VISION_API_KEY,
+                base_url=VISION_BASE_URL,
+                **kwargs,
+            )
+        else:
+            return llm_model_func(prompt, system_prompt, history_messages, **kwargs)
+    return vision_model_func
+
+
+def create_embedding_func():
+    return EmbeddingFunc(
+        embedding_dim=1024,
+        max_token_size=8192,
+        func=lambda texts: openai_embed(
+            texts,
+            model=EMBED_MODEL_NAME,
+            api_key=EMBED_API_KEY,
+            base_url=EMBED_BASE_URL,
+        ),
+    )
\ No newline at end of file
diff --git a/dsRagAnything/Util/__init__.py b/dsRagAnything/Util/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/dsRagAnything/Util/__pycache__/RagUtil.cpython-310.pyc b/dsRagAnything/Util/__pycache__/RagUtil.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c1afa31a9084eceba6aa27887375bab61944ddf0
GIT binary patch
literal 1860
zcmZ`)-ESL35Z~QfpU;l%CX~|fQG^g5GJ>EAgisp=lE#RN#H|_%;)rxyZr9Ew_hIc`
zN*!I_BEd`f2MBMx<WFK>kw850!V487%-s1Br!5=J?96`5P3HI8aoTP-35;L9+1YM2
z2>BJA>A}GH7({;xN)SOK;^NFA8Z(#0mTOV;t%%1pw^rCZvOg!HChP;^)<s=7Fgv0l
znlLv6-5}kWKOh?EcbS<_M>6q4FG%B2Bs1xS126D{ZE0GQ9T{&)aYRkKhlU7~;U{}Z
z(4~gH6=0NVP%tcz8zA}_P=XA}fC(zt0ewJn0^haz0UoIa2cr)1HHdx*R7NtI!=;?&
zEMu7^=poOopXm>nH0yV{Y3s4hWbBO;B%GPq?NDc_8hf$S+8;`7>i#J7cIDVOTfUaw
zo{G9$VHMrZ+S<CeervV2=Jl7?d+rSS%eQZOpY^_STj;DTZ}hyoch=xLcKHE?=_v=4
zv3I}q)leT1)du&`)zxd8LVua6U6^jGbVmjmSa<y4ax%`gp-h{1voP9(1)LW~W7CPE
z*o#vkBX0mD#Hji?P>dFT$IL1%{)pA>zxUq>l=K0S^S2fjqcrd%y|i$Cy~yf_stXXX
zuuIhe*IasPB>dMjLeQbSsph~45W{L<&2Se1)AOJZPeBIOcVs{dR2vUyj=%t_c(58Z
zFlInr1kv|E<(9gN$O;N{fFL<!DF9<(9kPT9+<#4QQOk+24;d0xG;-@l@)HZGXdbd5
zD^7C3;RM_nmfV;*tmVAq#`i!k<jlYdNwwB(^$nm_@&%aART@cC57H!)NoM$-mdfx<
z-p_C|8;_*A49S{C7z2@zIK~3Ce_8mMf2~-2cV{Gr*O6Bry#IEWDXfZm9#pqBMHHvv
zqJjra6u$3@f^%Qo+_-tGKh46<WT_(Jg^9mXaj~Ybm{akaWrCQNYT?!_DkP;U(4M;&
z>M%{Fn1&?NAA;f&hOv@sR_lZ@)@FY(>VVN??1Ue)WBb@?vp?BIM)~jT1^c&Wuk61!
zVbBwy&*=EXR*@1^sbJ)l(;SOzi$v%DFX$!A*9K*?&Ix~2y$ogma&92aPJ2xTolyWy
z1V4bz_s{}X+Wipu8<JNZBQ}y8gKvOn%q9c&WVGxz4|&;XEK@69>NyPYDY&z}^_AZ0
zG=%MPr9!Y%x~BkAQ$v>N$6uwh82Ry*@RvTqL=Z_<wE}AYv;XF#Iz|7*wp%ofvwByz
zyOwF6oe^PdI<bG>%hFw$csksdhGVx;_(Z2tIO9vz9nON_`RkYza<}YBHnsm%uY=od
zLW|YoBnVUYViXRyGvyDl7cG>(7e%jCSSjdt)%G5AK&=olw|yf`2H|j_oQk$n;9CTL
dYJSclAh8$8B?L|*a~v{DThyVB<#bwa{0C%Qz6byS

literal 0
HcmV?d00001

diff --git a/dsRagAnything/Util/__pycache__/__init__.cpython-310.pyc b/dsRagAnything/Util/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7cfc4fe6b6312c2c53e970954263b07e8eb3a8d
GIT binary patch
literal 143
zcmd1j<>g`kg0h6H3=sVoL?8o3AjbiSi&=m~3PUi1CZpd<h9ZzKg7{_RVil889G+j4
z4Wt5!^0QKtOMtAP#B|5J%94!Cy!4pRlFXc#`1s7c%#!$cy@JYH95%W6DWy57b|ABg
JnScZf0{|yNAQ}Jw

literal 0
HcmV?d00001