From 0c94853adce2d99391504b091ee49a8204812db4 Mon Sep 17 00:00:00 2001
From: HuangHai <10402852@qq.com>
Date: Mon, 30 Jun 2025 16:42:22 +0800
Subject: [PATCH] 'commit'

---
 dsRag/Test/TestReadGongShi.py | 36 ++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/dsRag/Test/TestReadGongShi.py b/dsRag/Test/TestReadGongShi.py
index 499c48ef..4f1f0752 100644
--- a/dsRag/Test/TestReadGongShi.py
+++ b/dsRag/Test/TestReadGongShi.py
@@ -1,19 +1,25 @@
 from docx import Document
+from docx.oxml.shared import qn
+from docx.oxml import parse_xml
 
+def run_has_ole_object(run):
+    """
+    检查run对象是否包含OLE对象
+    :param run: docx.text.run.Run对象
+    :return: bool
+    """
+    # 检查run的XML中是否包含OLE对象标签
+    run_element = run._r
+    for child in run_element.iterchildren():
+        if child.tag.endswith('object') or child.tag.endswith('OLEObject'):
+            print(str(child))
+            return True
+    return False
 
-def extract_text_from_docx(file_path):
-    doc = Document(file_path)
-    formulas = []
 
-    for para in doc.paragraphs:
-        for run in para.runs:
-            if run.text:  # 检查文本是否存在
-                formulas.append(run.text)
-
-    # 打印提取的公式
-    for index, formula in enumerate(formulas):
-        print(f'公式 {index + 1}: {formula}')
-
-
-# 路径可替换为您的 Word 文档路径
-extract_text_from_docx('D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx')
\ No newline at end of file
+# 测试代码
+doc = Document(r'D:\dsWork\dsProject\dsRag\static\Txt\化学方程式_CHEMISTRY_1.docx')
+for paragraph in doc.paragraphs:
+    for run in paragraph.runs:
+        if run_has_ole_object(run):
+            print("Found Ole")