You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

37 lines
1.3 KiB

import zipfile
import xml.etree.ElementTree as ET
def parse_docx(docx_path):
with zipfile.ZipFile(docx_path) as z:
with z.open('word/document.xml') as f:
tree = ET.parse(f)
root = tree.getroot()
ns = {
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math'
}
# 查找所有公式(包括浮动和内联公式)
formula_count = 0
for oMath in root.findall('.//m:oMath', ns):
print(oMath)
formula_count += 1
formula_text = ''
# 处理公式中的文本节点
for t in oMath.findall('.//m:t', ns):
if t.text:
formula_text += t.text
# 处理公式中的特殊符号
for e in oMath.findall('.//m:e', ns):
if e.text:
formula_text += e.text
print(f"公式{formula_count}内容: {formula_text}")
print(f"共找到{formula_count}个公式")
if __name__ == "__main__":
docx_path = r'D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx'
parse_docx(docx_path)