You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
37 lines
1.3 KiB
37 lines
1.3 KiB
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
|
|
def parse_docx(docx_path):
|
|
with zipfile.ZipFile(docx_path) as z:
|
|
with z.open('word/document.xml') as f:
|
|
tree = ET.parse(f)
|
|
root = tree.getroot()
|
|
ns = {
|
|
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
|
|
'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math'
|
|
}
|
|
|
|
# 查找所有公式(包括浮动和内联公式)
|
|
formula_count = 0
|
|
for oMath in root.findall('.//m:oMath', ns):
|
|
|
|
print(oMath)
|
|
formula_count += 1
|
|
formula_text = ''
|
|
# 处理公式中的文本节点
|
|
for t in oMath.findall('.//m:t', ns):
|
|
if t.text:
|
|
formula_text += t.text
|
|
|
|
# 处理公式中的特殊符号
|
|
for e in oMath.findall('.//m:e', ns):
|
|
if e.text:
|
|
formula_text += e.text
|
|
|
|
print(f"公式{formula_count}内容: {formula_text}")
|
|
|
|
print(f"共找到{formula_count}个公式")
|
|
|
|
if __name__ == "__main__":
|
|
docx_path = r'D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx'
|
|
parse_docx(docx_path) |