parent
88cc9c18e4
commit
0c94853adc
@ -1,19 +1,25 @@
|
||||
from docx import Document
|
||||
from docx.oxml.shared import qn
|
||||
from docx.oxml import parse_xml
|
||||
|
||||
def run_has_ole_object(run):
|
||||
"""
|
||||
检查run对象是否包含OLE对象
|
||||
:param run: docx.text.run.Run对象
|
||||
:return: bool
|
||||
"""
|
||||
# 检查run的XML中是否包含OLE对象标签
|
||||
run_element = run._r
|
||||
for child in run_element.iterchildren():
|
||||
if child.tag.endswith('object') or child.tag.endswith('OLEObject'):
|
||||
print(str(child))
|
||||
return True
|
||||
return False
|
||||
|
||||
def extract_text_from_docx(file_path):
|
||||
doc = Document(file_path)
|
||||
formulas = []
|
||||
|
||||
for para in doc.paragraphs:
|
||||
for run in para.runs:
|
||||
if run.text: # 检查文本是否存在
|
||||
formulas.append(run.text)
|
||||
|
||||
# 打印提取的公式
|
||||
for index, formula in enumerate(formulas):
|
||||
print(f'公式 {index + 1}: {formula}')
|
||||
|
||||
|
||||
# 路径可替换为您的 Word 文档路径
|
||||
extract_text_from_docx('D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx')
|
||||
# 测试代码
|
||||
doc = Document(r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx')
|
||||
for paragraph in doc.paragraphs:
|
||||
for run in paragraph.runs:
|
||||
if run_has_ole_object(run):
|
||||
print("Found Ole")
|
||||
|
Loading…
Reference in new issue