'commit'

4 weeks ago · 57a2ed2788
parent afc76083b3
commit 57a2ed2788
1 changed files with 8 additions and 4 deletions
--- a/dsRag/Test/TestReadWordContent.py
+++ b/dsRag/Test/TestReadWordContent.py
@ -2,7 +2,9 @@ import os
 import docx
 from docx.oxml.ns import nsmap

+
 def read_word_content(docx_path):
+    idx = 0
    """遍历Word文档的每个段落，输出文字或图片标识"""
    try:
        doc = docx.Document(docx_path)
@ -20,13 +22,15 @@ def read_word_content(docx_path):
                    break

            if has_image:
-                print("【图片】")
+                idx = idx + 1
+                print("【图片" + str(idx) + "】")
            elif paragraph.text.strip():
                print(paragraph.text.strip())

    except Exception as e:
        print(f"处理Word文档时出错: {str(e)}")

+
 if __name__ == "__main__":
    # 示例用法
    # 请将 'your_document.docx' 替换为你的Word文档路径