From 07086e3c0c2ef81cc116f7c31c2b4314f39d3c85 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 7 Jul 2025 10:26:35 +0800 Subject: [PATCH] 'commit' --- dsRagAnything/Doc/RagAnything配置.md | 85 +++++++++++--------------- 1 file changed, 35 insertions(+), 50 deletions(-) diff --git a/dsRagAnything/Doc/RagAnything配置.md b/dsRagAnything/Doc/RagAnything配置.md index aba3e52c..d72f1c8c 100644 --- a/dsRagAnything/Doc/RagAnything配置.md +++ b/dsRagAnything/Doc/RagAnything配置.md @@ -73,11 +73,11 @@ _HF_DEFAULT_ENDPOINT = "https://hf-mirror.com" -- **注意**:需要提前配置好环境变量后,再进入PyCharm进行调试,因为我发现,如果是在打开PyCharm的前提下,添加了环境就是PyCharm里面的代码是检测不到的。 +- **注意**:需要提前配置好环境变量后,再进入$PyCharm$进行调试,因为我发现,如果是在打开$PyCharm$的前提下,添加了环境就是$PyCharm$里面的代码是检测不到的。 -- 因为原版的程序有soffice.exe版本检测框弹出,不能直接用于生产环境,我只好手动修改了下代码: +- 因为原版的程序有$soffice.exe$版本检测框弹出,不能直接用于生产环境,手动修改代码: ```cmd D:\anaconda3\envs\raganything\Lib\site-packages\raganything\mineru_parser.py @@ -86,6 +86,37 @@ _HF_DEFAULT_ENDPOINT = "https://hf-mirror.com" 修改内容: ```python + # 62行 + @staticmethod + def _run_mineru_command( + input_path: Union[str, Path], + output_dir: Union[str, Path], + method: str = "auto", + lang: Optional[str] = None, + backend: str = "pipeline", + start_page: Optional[int] = None, + end_page: Optional[int] = None, + formula: bool = True, + table: bool = True, + device: Optional[str] = None, + # source: str = "huggingface", # 模型来源,默认 huggingface + # source: str = "modelscope", # 因为第一次手动从魔搭下载了模型,所以可以直接使用local模式 + source: str = "local" + ) -> None: + + # 107行 + try: + result = subprocess.run( + cmd, + #capture_output=True, #注释掉这句,可以把输出打印出来 + text=True, + check=True, + encoding="utf-8", + errors="ignore", + ) + print("MinerU command executed successfully") + + # 442行 # Check if LibreOffice is available #libreoffice_available = False working_libreoffice_cmd = 'soffice' @@ -143,56 +174,10 @@ _HF_DEFAULT_ENDPOINT = "https://hf-mirror.com" # "- CentOS/RHEL: sudo yum install libreoffice\n" # "Alternatively, convert the document to PDF manually.\n" # "MinerU 2.0 no longer includes built-in Office document conversion." - # ) + # ) ``` -- 首次运行时,代码会执行下面的类似命令 - -```cmd -mineru -p C:\\Users\\ADMINI~1\\AppData\\Local\\Temp\\tmpt2sl2vd1\\驿来特平台安全.pdf -o output -m auto -b pipeline --source modelscope -``` - -![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250706161400967.png) - -下载需要等待,但程序本身不显示进度,我一直以为卡住了,后来跟踪代码,才知道它是在下载模型。 - -- 修改源码: - -```cmd -D:\anaconda3\envs\raganything\Lib\site-packages\raganything\mineru_parser.py -``` - -```python -# 62行 -@staticmethod -def _run_mineru_command( - input_path: Union[str, Path], - output_dir: Union[str, Path], - method: str = "auto", - lang: Optional[str] = None, - backend: str = "pipeline", - start_page: Optional[int] = None, - end_page: Optional[int] = None, - formula: bool = True, - table: bool = True, - device: Optional[str] = None, - source: str = "local", # 'huggingface' --> 'local' -) -> None: - -# 107行 -try: - result = subprocess.run( - cmd, - #capture_output=True, #注释掉这句,可以把输出打印出来 - text=True, - check=True, - encoding="utf-8", - errors="ignore", - ) - print("MinerU command executed successfully") -``` - - + #### 五、相关资料