|
|
|
@ -2,11 +2,13 @@ import os
|
|
|
|
|
import docx
|
|
|
|
|
from docx.oxml.ns import nsmap
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def read_word_content(docx_path):
|
|
|
|
|
idx = 0
|
|
|
|
|
"""遍历Word文档的每个段落,输出文字或图片标识"""
|
|
|
|
|
try:
|
|
|
|
|
doc = docx.Document(docx_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for paragraph in doc.paragraphs:
|
|
|
|
|
has_image = False
|
|
|
|
|
# 检查段落中是否有图片
|
|
|
|
@ -18,15 +20,17 @@ def read_word_content(docx_path):
|
|
|
|
|
break
|
|
|
|
|
if has_image:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if has_image:
|
|
|
|
|
print("【图片】")
|
|
|
|
|
idx = idx + 1
|
|
|
|
|
print("【图片" + str(idx) + "】")
|
|
|
|
|
elif paragraph.text.strip():
|
|
|
|
|
print(paragraph.text.strip())
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"处理Word文档时出错: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
# 示例用法
|
|
|
|
|
# 请将 'your_document.docx' 替换为你的Word文档路径
|
|
|
|
@ -34,4 +38,4 @@ if __name__ == "__main__":
|
|
|
|
|
if os.path.exists(word_document_path):
|
|
|
|
|
read_word_content(word_document_path)
|
|
|
|
|
else:
|
|
|
|
|
print(f"文件不存在: {word_document_path}")
|
|
|
|
|
print(f"文件不存在: {word_document_path}")
|
|
|
|
|