import zipfile import xml.etree.ElementTree as ET def parse_docx(docx_path): with zipfile.ZipFile(docx_path) as z: with z.open('word/document.xml') as f: tree = ET.parse(f) root = tree.getroot() ns = { 'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main', 'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math' } # 查找所有公式(包括浮动和内联公式) formula_count = 0 for oMath in root.findall('.//m:oMath', ns): print(oMath) formula_count += 1 formula_text = '' # 处理公式中的文本节点 for t in oMath.findall('.//m:t', ns): if t.text: formula_text += t.text # 处理公式中的特殊符号 for e in oMath.findall('.//m:e', ns): if e.text: formula_text += e.text print(f"公式{formula_count}内容: {formula_text}") print(f"共找到{formula_count}个公式") if __name__ == "__main__": docx_path = r'D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx' parse_docx(docx_path)