You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

35 lines
1.1 KiB

4 weeks ago
import docx
4 weeks ago
4 weeks ago
def read_word_content(docx_path):
4 weeks ago
idx = 0
4 weeks ago
"""遍历Word文档的每个段落输出文字或图片标识"""
try:
doc = docx.Document(docx_path)
4 weeks ago
4 weeks ago
for paragraph in doc.paragraphs:
has_image = False
# 检查段落中是否有图片
for run in paragraph.runs:
for element in run._element:
if element.tag.endswith('drawing'):
# 找到图片元素
has_image = True
break
if has_image:
break
4 weeks ago
4 weeks ago
if has_image:
4 weeks ago
idx = idx + 1
print("【图片" + str(idx) + "")
4 weeks ago
elif paragraph.text.strip():
print(paragraph.text.strip())
except Exception as e:
print(f"处理Word文档时出错: {str(e)}")
4 weeks ago
4 weeks ago
if __name__ == "__main__":
word_document_path = "d:\\dsWork\\dsProject\\dsRag\\static\\Test\\带图的WORD文档_MATH_3.docx"
4 weeks ago
read_word_content(word_document_path)