parent
96bf185757
commit
d37e464a5d
@ -0,0 +1,31 @@
|
||||
import os
|
||||
import uuid
|
||||
from docx import Document
|
||||
|
||||
def extract_images_from_word(word_path, output_dir):
|
||||
"""
|
||||
从Word文档中提取图片并保存到指定目录
|
||||
:param word_path: Word文档路径
|
||||
:param output_dir: 图片输出目录
|
||||
"""
|
||||
doc = Document(word_path)
|
||||
|
||||
# 确保输出目录存在
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 获取文档中的所有图片
|
||||
for rel in doc.part.rels.values():
|
||||
if "image" in rel.target_ref:
|
||||
img_data = rel.target_part.blob
|
||||
|
||||
# 使用UUID命名图片
|
||||
output_path = os.path.join(output_dir, f"{uuid.uuid4()}.jpg")
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(img_data)
|
||||
print(f"图片已保存到: {output_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
word_path = "d:\\dsWork\\dsProject\\dsRag\\Test\\带图的WORD文档.docx"
|
||||
output_dir = os.path.abspath(os.path.join(os.path.dirname(word_path), "..", "static", "Images"))
|
||||
|
||||
extract_images_from_word(word_path, output_dir)
|
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
Loading…
Reference in new issue