You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

37 lines
1.3 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import os
import docx
from docx.oxml.ns import nsmap
def read_word_content(docx_path):
"""遍历Word文档的每个段落输出文字或图片标识"""
try:
doc = docx.Document(docx_path)
for paragraph in doc.paragraphs:
has_image = False
# 检查段落中是否有图片
for run in paragraph.runs:
for element in run._element:
if element.tag.endswith('drawing'):
# 找到图片元素
has_image = True
break
if has_image:
break
if has_image:
print("【图片】")
elif paragraph.text.strip():
print(paragraph.text.strip())
except Exception as e:
print(f"处理Word文档时出错: {str(e)}")
if __name__ == "__main__":
# 示例用法
# 请将 'your_document.docx' 替换为你的Word文档路径
word_document_path = "d:\\dsWork\\dsProject\\dsRag\\static\\Test\\带图的WORD文档_MATH_3.docx"
if os.path.exists(word_document_path):
read_word_content(word_document_path)
else:
print(f"文件不存在: {word_document_path}")