import os import docx from docx.oxml.ns import nsmap def read_word_content(docx_path): """遍历Word文档的每个段落,输出文字或图片标识""" try: doc = docx.Document(docx_path) for paragraph in doc.paragraphs: has_image = False # 检查段落中是否有图片 for run in paragraph.runs: for element in run._element: if element.tag.endswith('drawing'): # 找到图片元素 has_image = True break if has_image: break if has_image: print("【图片】") elif paragraph.text.strip(): print(paragraph.text.strip()) except Exception as e: print(f"处理Word文档时出错: {str(e)}") if __name__ == "__main__": # 示例用法 # 请将 'your_document.docx' 替换为你的Word文档路径 word_document_path = "d:\\dsWork\\dsProject\\dsRag\\static\\Test\\带图的WORD文档_MATH_3.docx" if os.path.exists(word_document_path): read_word_content(word_document_path) else: print(f"文件不存在: {word_document_path}")