main
HuangHai 4 weeks ago
parent 1a8280a328
commit bc89a6cd6f

@ -2,7 +2,6 @@
conda activate rag
pip install PyPDF2
https://www.jianshu.com/p/d893d3dfd65a
https://www.jianshu.com/p/d893d3dfd65a
"""
from Util.PdfUtil import read_pdf_file

@ -0,0 +1,33 @@
"""
pip install python-pptx
"""
from pptx import Presentation
import os
def extract_text_from_pptx(file_path):
"""从pptx文件中提取所有文本内容"""
prs = Presentation(file_path)
text_content = []
# 遍历所有幻灯片
for slide in prs.slides:
# 遍历幻灯片中的所有形状
for shape in slide.shapes:
if hasattr(shape, "text"):
text = shape.text.strip()
if text: # 只添加非空文本
text_content.append(text)
return '\n'.join(text_content)
if __name__ == "__main__":
# 示例用法
pptx_file = "../Txt/东师理想智慧教学管理应用介绍.pptx" # 替换为实际文件路径
if os.path.exists(pptx_file):
text = extract_text_from_pptx(pptx_file)
print("提取的文本内容:")
print(text)
else:
print(f"文件 {pptx_file} 不存在")
Loading…
Cancel
Save