From 3ce3f1afc37d8aace7f32300682e053f34138709 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Mon, 30 Jun 2025 16:50:42 +0800 Subject: [PATCH] 'commit' --- dsRag/Test/TestReadGongShi.py | 2 -- dsRag/Test/TestReadSpire.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 dsRag/Test/TestReadSpire.py diff --git a/dsRag/Test/TestReadGongShi.py b/dsRag/Test/TestReadGongShi.py index 4f1f0752..658f9b81 100644 --- a/dsRag/Test/TestReadGongShi.py +++ b/dsRag/Test/TestReadGongShi.py @@ -1,6 +1,4 @@ from docx import Document -from docx.oxml.shared import qn -from docx.oxml import parse_xml def run_has_ole_object(run): """ diff --git a/dsRag/Test/TestReadSpire.py b/dsRag/Test/TestReadSpire.py new file mode 100644 index 00000000..3b418278 --- /dev/null +++ b/dsRag/Test/TestReadSpire.py @@ -0,0 +1,32 @@ +# https://www.e-iceblue.cn/doc_python_other/python-insert-or-extract-ole-objects-in-word.html +# pip install Spire.Doc +from spire.doc import * +from spire.doc.common import * + +# 创建Document类的对象 +doc = Document() +# 加载Word文档 +doc.LoadFromFile(r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx') + +i = 1 +# 遍历Word文档的所有节 +for k in range(doc.Sections.Count): + sec = doc.Sections.get_Item(k) + # 遍历每个节的所有子对象 + for j in range(sec.Body.ChildObjects.Count): + obj = sec.Body.ChildObjects.get_Item(j) + # 检查子对象是否为段落 + if isinstance(obj, Paragraph): + par = obj if isinstance(obj, Paragraph) else None + # 遍历段落中的子对象 + for m in range(par.ChildObjects.Count): + o = par.ChildObjects.get_Item(m) + # 检查子对象是否为OLE对象 + if o.DocumentObjectType == DocumentObjectType.OleObject: + ole = o if isinstance(o, DocOleObject) else None + s = ole.ObjectType + if s.startswith("Equation.DSMT4"): + ext = ".mathtype" + print("equation") + +doc.Close() \ No newline at end of file