parent
96bf185757
commit
d37e464a5d
@ -0,0 +1,31 @@
|
|||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
def extract_images_from_word(word_path, output_dir):
|
||||||
|
"""
|
||||||
|
从Word文档中提取图片并保存到指定目录
|
||||||
|
:param word_path: Word文档路径
|
||||||
|
:param output_dir: 图片输出目录
|
||||||
|
"""
|
||||||
|
doc = Document(word_path)
|
||||||
|
|
||||||
|
# 确保输出目录存在
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 获取文档中的所有图片
|
||||||
|
for rel in doc.part.rels.values():
|
||||||
|
if "image" in rel.target_ref:
|
||||||
|
img_data = rel.target_part.blob
|
||||||
|
|
||||||
|
# 使用UUID命名图片
|
||||||
|
output_path = os.path.join(output_dir, f"{uuid.uuid4()}.jpg")
|
||||||
|
with open(output_path, "wb") as f:
|
||||||
|
f.write(img_data)
|
||||||
|
print(f"图片已保存到: {output_path}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
word_path = "d:\\dsWork\\dsProject\\dsRag\\Test\\带图的WORD文档.docx"
|
||||||
|
output_dir = os.path.abspath(os.path.join(os.path.dirname(word_path), "..", "static", "Images"))
|
||||||
|
|
||||||
|
extract_images_from_word(word_path, output_dir)
|
Binary file not shown.
After Width: | Height: | Size: 47 KiB |
Loading…
Reference in new issue