diff --git a/dsRag/Util/PdfUtil.py b/dsRag/Util/PdfUtil.py deleted file mode 100644 index bdf85000..00000000 --- a/dsRag/Util/PdfUtil.py +++ /dev/null @@ -1,34 +0,0 @@ -import PyPDF2 -import os - - -def read_pdf_file(file_path): - """ - 读取PDF文件内容 - :param file_path: PDF文件路径 - :return: 文档文本内容 - """ - try: - # 检查文件是否存在 - if not os.path.exists(file_path): - raise FileNotFoundError(f"文件 {file_path} 不存在") - - # 检查文件是否为PDF - if not file_path.lower().endswith('.pdf'): - raise ValueError("仅支持.pdf格式的文件") - - text = "" - - # 以二进制模式打开PDF文件 - with open(file_path, 'rb') as file: - reader = PyPDF2.PdfReader(file) - - # 逐页读取内容 - for page in reader.pages: - text += page.extract_text() + "\n" - - return text.strip() - - except Exception as e: - print(f"读取PDF文件时出错: {str(e)}") - return None \ No newline at end of file diff --git a/dsRag/Util/__pycache__/PdfUtil.cpython-310.pyc b/dsRag/Util/__pycache__/PdfUtil.cpython-310.pyc deleted file mode 100644 index d539d234..00000000 Binary files a/dsRag/Util/__pycache__/PdfUtil.cpython-310.pyc and /dev/null differ