From 6c20771fb7c9f9d94885574e7978d51183d486df Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sun, 6 Jul 2025 19:42:58 +0800 Subject: [PATCH] 'commit' --- dsRagAnything/T1_Train.py | 75 +-------------- dsRagAnything/T2_Query.py | 87 ++---------------- dsRagAnything/Util/RagUtil.py | 69 ++++++++++++++ dsRagAnything/Util/__init__.py | 0 .../Util/__pycache__/RagUtil.cpython-310.pyc | Bin 0 -> 1860 bytes .../Util/__pycache__/__init__.cpython-310.pyc | Bin 0 -> 143 bytes 6 files changed, 83 insertions(+), 148 deletions(-) create mode 100644 dsRagAnything/Util/RagUtil.py create mode 100644 dsRagAnything/Util/__init__.py create mode 100644 dsRagAnything/Util/__pycache__/RagUtil.cpython-310.pyc create mode 100644 dsRagAnything/Util/__pycache__/__init__.cpython-310.pyc diff --git a/dsRagAnything/T1_Train.py b/dsRagAnything/T1_Train.py index 4dd4acfa..a6f52177 100644 --- a/dsRagAnything/T1_Train.py +++ b/dsRagAnything/T1_Train.py @@ -1,19 +1,12 @@ import asyncio -import sys -import loguru from raganything import RAGAnything, RAGAnythingConfig -from lightrag.llm.openai import openai_complete_if_cache, openai_embed -from lightrag.utils import EmbeddingFunc -from Config.Config import * + +from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func async def main(): - # 在main函数开头添加日志配置 - loguru.logger.remove() # 移除默认配置 - loguru.logger.add(sys.stderr, level="INFO") # 输出INFO及以上级别到控制台 - # Create RAGAnything configuration config = RAGAnythingConfig( working_dir="./rag_storage", mineru_parse_method="auto", @@ -22,67 +15,10 @@ async def main(): enable_equation_processing=True, ) - # Define LLM model function - def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): - return openai_complete_if_cache( - LLM_MODEL_NAME, - prompt, - system_prompt=system_prompt, - history_messages=history_messages, - api_key=LLM_API_KEY, - base_url=LLM_BASE_URL, - **kwargs, - ) - - # Define vision model function for image processing - def vision_model_func( - prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs - ): - if image_data: - return openai_complete_if_cache( - VISION_MODEL_NAME, # 这里设置为了硅基流动中免费视觉模型的模型名称 - "", - system_prompt=None, - history_messages=[], - messages=[ - {"role": "system", "content": system_prompt} - if system_prompt - else None, - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{image_data}" - }, - }, - ], - } - if image_data - else {"role": "user", "content": prompt}, - ], - api_key=VISION_API_KEY, - base_url=VISION_BASE_URL, - **kwargs, - ) - else: - return llm_model_func(prompt, system_prompt, history_messages, **kwargs) - - # Define embedding function - embedding_func = EmbeddingFunc( - embedding_dim=1024, # 这里设置为了硅基流动中免费模型的嵌入维度 - max_token_size=8192, - func=lambda texts: openai_embed( - texts, - model=EMBED_MODEL_NAME, - api_key=EMBED_API_KEY, - base_url=EMBED_BASE_URL, - ), - ) + llm_model_func = create_llm_model_func() + vision_model_func = create_vision_model_func(llm_model_func) + embedding_func = create_embedding_func() - # Initialize RAGAnything rag = RAGAnything( config=config, llm_model_func=llm_model_func, @@ -90,7 +26,6 @@ async def main(): embedding_func=embedding_func, ) - # file_path = "D:\python\小乔证件\黄琬乔2023蓝桥杯省赛准考证.pdf" file_path = "./Txt/驿来特平台安全.docx" await rag.process_document_complete( file_path=file_path, diff --git a/dsRagAnything/T2_Query.py b/dsRagAnything/T2_Query.py index 0952d6a8..d3567372 100644 --- a/dsRagAnything/T2_Query.py +++ b/dsRagAnything/T2_Query.py @@ -1,111 +1,42 @@ import asyncio +import os from lightrag.kg.shared_storage import initialize_pipeline_status from raganything import RAGAnything from lightrag import LightRAG -from lightrag.llm.openai import openai_complete_if_cache, openai_embed -from lightrag.utils import EmbeddingFunc -import os -from Config.Config import * - +from Util.RagUtil import create_llm_model_func, create_vision_model_func, create_embedding_func async def load_existing_lightrag(): - # 首先,创建或加载已存在的 LightRAG 实例 lightrag_working_dir = "./rag_storage" - # 检查是否存在之前的 LightRAG 实例 if os.path.exists(lightrag_working_dir) and os.listdir(lightrag_working_dir): print("✅ 发现已存在的 LightRAG 实例,正在加载...") else: print("❌ 未找到已存在的 LightRAG 实例,将创建新实例") - # 使用您的配置创建/加载 LightRAG 实例 + llm_model_func = create_llm_model_func() + vision_model_func = create_vision_model_func(llm_model_func) + embedding_func = create_embedding_func() + lightrag_instance = LightRAG( working_dir=lightrag_working_dir, - llm_model_func=lambda prompt, system_prompt=None, history_messages=[], **kwargs: openai_complete_if_cache( - "deepseek-chat", - prompt, - system_prompt=system_prompt, - history_messages=history_messages, - api_key=LLM_API_KEY, - base_url=LLM_BASE_URL, - **kwargs, - ), - embedding_func=EmbeddingFunc( - embedding_dim=1024, - max_token_size=8192, - func=lambda texts: openai_embed( - texts, - model=EMBED_MODEL_NAME, - api_key=EMBED_API_KEY, - base_url=EMBED_BASE_URL, - ), - ) + llm_model_func=llm_model_func, + embedding_func=embedding_func ) - # 初始化存储(如果有现有数据,这将加载它们) await lightrag_instance.initialize_storages() await initialize_pipeline_status() - # 定义视觉模型函数用于图像处理 - def vision_model_func( - prompt, system_prompt=None, history_messages=None, image_data=None, **kwargs - ): - if history_messages is None: - history_messages = [] - if image_data: - return openai_complete_if_cache( - VISION_MODEL_NAME, - "", - system_prompt=None, - history_messages=[], - messages=[ - {"role": "system", "content": system_prompt} - if system_prompt - else None, - { - "role": "user", - "content": [ - {"type": "text", "text": prompt}, - { - "type": "image_url", - "image_url": { - "url": f"data:image/jpeg;base64,{image_data}" - }, - }, - ], - } - if image_data - else {"role": "user", "content": prompt}, - ], - api_key=VISION_API_KEY, - base_url=VISION_BASE_URL, - **kwargs, - ) - else: - return lightrag_instance.llm_model_func(prompt, system_prompt, history_messages, **kwargs) - - # 现在使用已存在的 LightRAG 实例初始化 RAGAnything rag = RAGAnything( - lightrag=lightrag_instance, # 传入已存在的 LightRAG 实例 + lightrag=lightrag_instance, vision_model_func=vision_model_func, - # 注意:working_dir、llm_model_func、embedding_func 等都从 lightrag_instance 继承 ) - # 查询已存在的知识库 result = await rag.aquery( - # "黄琬乔的准考证信息告诉我一下?", "平台安全的保证方法有哪些?", mode="hybrid" ) print("查询结果:", result) - # 向已存在的 LightRAG 实例添加新的多模态文档 - # await rag.process_document_complete( - # file_path="path/to/new/multimodal_document.pdf", - # output_dir="./output" - # ) - - if __name__ == "__main__": asyncio.run(load_existing_lightrag()) diff --git a/dsRagAnything/Util/RagUtil.py b/dsRagAnything/Util/RagUtil.py new file mode 100644 index 00000000..e9c2d87c --- /dev/null +++ b/dsRagAnything/Util/RagUtil.py @@ -0,0 +1,69 @@ +import asyncio +from lightrag.llm.openai import openai_complete_if_cache, openai_embed +from lightrag.utils import EmbeddingFunc +from Config.Config import * + + +def create_llm_model_func(): + def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): + return openai_complete_if_cache( + LLM_MODEL_NAME, + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key=LLM_API_KEY, + base_url=LLM_BASE_URL, + **kwargs, + ) + return llm_model_func + + +def create_vision_model_func(llm_model_func): + def vision_model_func( + prompt, system_prompt=None, history_messages=[], image_data=None, **kwargs + ): + if image_data: + return openai_complete_if_cache( + VISION_MODEL_NAME, + "", + system_prompt=None, + history_messages=[], + messages=[ + {"role": "system", "content": system_prompt} + if system_prompt + else None, + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_data}" + }, + }, + ], + } + if image_data + else {"role": "user", "content": prompt}, + ], + api_key=VISION_API_KEY, + base_url=VISION_BASE_URL, + **kwargs, + ) + else: + return llm_model_func(prompt, system_prompt, history_messages, **kwargs) + return vision_model_func + + +def create_embedding_func(): + return EmbeddingFunc( + embedding_dim=1024, + max_token_size=8192, + func=lambda texts: openai_embed( + texts, + model=EMBED_MODEL_NAME, + api_key=EMBED_API_KEY, + base_url=EMBED_BASE_URL, + ), + ) \ No newline at end of file diff --git a/dsRagAnything/Util/__init__.py b/dsRagAnything/Util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsRagAnything/Util/__pycache__/RagUtil.cpython-310.pyc b/dsRagAnything/Util/__pycache__/RagUtil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1afa31a9084eceba6aa27887375bab61944ddf0 GIT binary patch literal 1860 zcmZ`)-ESL35Z~QfpU;l%CX~|fQG^g5GJ>EAgisp=lE#RN#H|_%;)rxyZr9Ew_hIc` zN*!I_BEd`f2MBMxkw850!V487%-s1Br!5=J?96`5P3HI8aoTP-35;L9+1YM2 z2>BJA>A}GH7({;xN)SOK;^NFA8Z(#0mTOV;t%%1pw^rCZvOg!HChP;^)6q4FG%B2Bs1xS126D{ZE0GQ9T{&)aYRkKhlU7~;U{}Z z(4~gH6=0NVP%tcz8zA}_P=XA}fC(zt0ewJn0^haz0UoIa2cr)1HHdx*R7NtI!=;?& zEMu7^=poOopXm>nH0yV{Y3s4hWbBO;B%GPq?NDc_8hf$S+8;`7>i#J7cIDVOTfUaw zo{G9$VHMrZ+SdFT$IL1%{)pA>zxUq>l=K0S^S2fjqcrd%y|i$Cy~yf_stXXX zuuIhe*IasPB>dMjLeQbSsph~45W{L<&2Se1)AOJZPeBIOcVs{dR2vUyj=%t_c(58Z zFlInr1kv|E<(9gN$O;N{fFLM%{Fn1&?NAA;f&hOv@sR_lZ@)@FY(>VVN??1Ue)WBb@?vp?BIM)~jT1^c&Wuk61! zVbBwy&*=EXR*@1^sbJ)l(;SOzi$v%DFX$!A*9K*?&Ix~2y$ogma&92aPJ2xTolyWy z1V4bz_s{}X+Wipu8NyPYDY&z}^_AZ0 zG=%MPr9!Y%x~BkAQ$v>N$6uwh82Ry*@RvTqL=Z_%hFw$csksdhGVx;_(Z2tIO9vz9nON_`RkYza<}YBHnsm%uY=od zLW|YoBnVUYViXRyGvyDl7cG>(7e%jCSSjdt)%G5AK&=olw|yf`2H|j_oQk$n;9CTL dYJSclAh8$8B?L|*a~v{DThyVB<#bwa{0C%Qz6byS literal 0 HcmV?d00001 diff --git a/dsRagAnything/Util/__pycache__/__init__.cpython-310.pyc b/dsRagAnything/Util/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d7cfc4fe6b6312c2c53e970954263b07e8eb3a8d GIT binary patch literal 143 zcmd1j<>g`kg0h6H3=sVoL?8o3AjbiSi&=m~3PUi1CZpd