From 77234939e1cf52d59b72b99ab9181e27dfa4cb16 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 30 Jun 2025 07:52:25 +0800 Subject: [PATCH] 'commit' --- dsRag/Util/PdfUtil.py | 34 ------------------ .../Util/__pycache__/PdfUtil.cpython-310.pyc | Bin 892 -> 0 bytes 2 files changed, 34 deletions(-) delete mode 100644 dsRag/Util/PdfUtil.py delete mode 100644 dsRag/Util/__pycache__/PdfUtil.cpython-310.pyc diff --git a/dsRag/Util/PdfUtil.py b/dsRag/Util/PdfUtil.py deleted file mode 100644 index bdf85000..00000000 --- a/dsRag/Util/PdfUtil.py +++ /dev/null @@ -1,34 +0,0 @@ -import PyPDF2 -import os - - -def read_pdf_file(file_path): - """ - 读取PDF文件内容 - :param file_path: PDF文件路径 - :return: 文档文本内容 - """ - try: - # 检查文件是否存在 - if not os.path.exists(file_path): - raise FileNotFoundError(f"文件 {file_path} 不存在") - - # 检查文件是否为PDF - if not file_path.lower().endswith('.pdf'): - raise ValueError("仅支持.pdf格式的文件") - - text = "" - - # 以二进制模式打开PDF文件 - with open(file_path, 'rb') as file: - reader = PyPDF2.PdfReader(file) - - # 逐页读取内容 - for page in reader.pages: - text += page.extract_text() + "\n" - - return text.strip() - - except Exception as e: - print(f"读取PDF文件时出错: {str(e)}") - return None \ No newline at end of file diff --git a/dsRag/Util/__pycache__/PdfUtil.cpython-310.pyc b/dsRag/Util/__pycache__/PdfUtil.cpython-310.pyc deleted file mode 100644 index d539d23446d721a333a31b8c857515f6832f706c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 892 zcmZ8g%}*0S6n`_j+iuskAO|tXp$9MZK%y6u#;A!vJP`uX#s))bXMkequCuc#kgZV* za)EFm_yHtsylA3w!w-mmfPcbVr4{}KW7Id45aQdsk9qI+-oE!H<0cXgAg#>hrj`wW zANp{bC?0Mi`@JX_Fj9hCTaz4Nlo<<Ao0#}q-W>)%55zoVGHEn%9} zX$TGCcSwVg5H^6(Eu%)Bp0kkF5%+K-vkoo-T!Su)W({T_5frY^^Wt z@9s38FE&4ZIf`VwyvWa_CW<9K?&alFCUt^3__W>pw$MfiE(1}{q%gnLc%$X!`%_hd zDE_HkI~C}CruILtG(Ws-ZoZv2`#d%g=-b)fU2Lswx0au3PpiJy+*|$mdZC@BKvD5P z5oajqLFj)Cw_fZtm%bjZy~?E0or=1?GIg^m!e@)V^wrrx?0?9WgKkh}146h$StYl^ zg|c~>`IRDKoxAx`z}rF7^>|rDhUakDE4kP(n;7AF#zoHZ@{`=p#rdoh`GOpmXei5< zqUfpEz-)ngvgnqT<%wchDgy)QNG^7=J!KvxnA)KtqFobPo#cw}cOcH;Xus0^nK9;P zUGWgruyCh&L83L1pB%d*i={E_M(aLrP9=1SaopCpu4rMle-QgGcF5C{JMS_zJ3xm*d%WJ E18r6ibN~PV