import os
import docx
from docx.oxml.ns import nsmap

def read_word_content(docx_path):
    """遍历Word文档的每个段落，输出文字或图片标识"""
    try:
        doc = docx.Document(docx_path)
        
        for paragraph in doc.paragraphs:
            has_image = False
            # 检查段落中是否有图片
            for run in paragraph.runs:
                for element in run._element:
                    if element.tag.endswith('drawing'):
                        # 找到图片元素
                        has_image = True
                        break
                if has_image:
                    break
            
            if has_image:
                print("【图片】")
            elif paragraph.text.strip():
                print(paragraph.text.strip())

    except Exception as e:
        print(f"处理Word文档时出错: {str(e)}")

if __name__ == "__main__":
    # 示例用法
    # 请将 'your_document.docx' 替换为你的Word文档路径
    word_document_path = "d:\\dsWork\\dsProject\\dsRag\\static\\Test\\带图的WORD文档_MATH_3.docx"
    if os.path.exists(word_document_path):
        read_word_content(word_document_path)
    else:
        print(f"文件不存在: {word_document_path}")