main
HuangHai 3 weeks ago
parent 0c94853adc
commit 3ce3f1afc3

@ -1,6 +1,4 @@
from docx import Document
from docx.oxml.shared import qn
from docx.oxml import parse_xml
def run_has_ole_object(run):
"""

@ -0,0 +1,32 @@
# https://www.e-iceblue.cn/doc_python_other/python-insert-or-extract-ole-objects-in-word.html
# pip install Spire.Doc
from spire.doc import *
from spire.doc.common import *
# 创建Document类的对象
doc = Document()
# 加载Word文档
doc.LoadFromFile(r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx')
i = 1
# 遍历Word文档的所有节
for k in range(doc.Sections.Count):
sec = doc.Sections.get_Item(k)
# 遍历每个节的所有子对象
for j in range(sec.Body.ChildObjects.Count):
obj = sec.Body.ChildObjects.get_Item(j)
# 检查子对象是否为段落
if isinstance(obj, Paragraph):
par = obj if isinstance(obj, Paragraph) else None
# 遍历段落中的子对象
for m in range(par.ChildObjects.Count):
o = par.ChildObjects.get_Item(m)
# 检查子对象是否为OLE对象
if o.DocumentObjectType == DocumentObjectType.OleObject:
ole = o if isinstance(o, DocOleObject) else None
s = ole.ObjectType
if s.startswith("Equation.DSMT4"):
ext = ".mathtype"
print("equation")
doc.Close()
Loading…
Cancel
Save