# conda activate rag
# pip install pptx2md
import os
import re
# 源pptx文件
source_pptx = r'D:\dsWork\dsProject\dsRag\Test\楚雄州教育信息化2.0建设项目--智慧教学支撑系统介绍.pptx'
# 目标md文件
target_md = r'C:\logs\output.md'
# 图片保存路径
img_path = r'C:\logs\img'
# 运行命令
cmd = r'pptx2md ' + source_pptx + ' -o ' + target_md + ' -i ' + img_path
os.system(cmd)
# 读取target_md内容
with open(target_md, 'r', encoding='utf-8') as file:
# 按行读取
for i, line in enumerate(file.readlines()):
# 过滤掉所有图片,即以![]开头的行
if not line.startswith('![') and not line.startswith('
# 2、
# 3、
line = re.sub(r'|', '', line)
line = line.replace("__", "")
line = line.replace("---", "")
line = line.replace(" ", "")
line = line.replace("\\", "")
if line.strip():
print(f"{line.strip()}")