main
HuangHai 3 weeks ago
parent 88cc9c18e4
commit 0c94853adc

@ -1,19 +1,25 @@
from docx import Document
from docx.oxml.shared import qn
from docx.oxml import parse_xml
def run_has_ole_object(run):
"""
检查run对象是否包含OLE对象
:param run: docx.text.run.Run对象
:return: bool
"""
# 检查run的XML中是否包含OLE对象标签
run_element = run._r
for child in run_element.iterchildren():
if child.tag.endswith('object') or child.tag.endswith('OLEObject'):
print(str(child))
return True
return False
def extract_text_from_docx(file_path):
doc = Document(file_path)
formulas = []
for para in doc.paragraphs:
for run in para.runs:
if run.text: # 检查文本是否存在
formulas.append(run.text)
# 打印提取的公式
for index, formula in enumerate(formulas):
print(f'公式 {index + 1}: {formula}')
# 路径可替换为您的 Word 文档路径
extract_text_from_docx('D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx')
# 测试代码
doc = Document(r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx')
for paragraph in doc.paragraphs:
for run in paragraph.runs:
if run_has_ole_object(run):
print("Found Ole")

Loading…
Cancel
Save