From 6ab546626dbbefffa64d4f162e0599fa140d5c9d Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sat, 28 Jun 2025 10:36:03 +0800 Subject: [PATCH] 'commit' --- dsRag/Test/Test_MatchImage.py | 125 +++++++++++++++--- dsRag/Util/WordImageUtil.py | 10 +- .../__pycache__/WordImageUtil.cpython-310.pyc | Bin 1713 -> 1766 bytes dsRag/static/Test/1.txt | 2 +- dsRag/static/Test/2.txt | 2 +- ...g => 81572c98254043d4a475ab7381979a67.png} | Bin ...g => b536401321764b20b3c000c120ab3c5b.png} | Bin 7 files changed, 114 insertions(+), 25 deletions(-) rename dsRag/static/Test/{1.png => 81572c98254043d4a475ab7381979a67.png} (100%) rename dsRag/static/Test/{2.png => b536401321764b20b3c000c120ab3c5b.png} (100%) diff --git a/dsRag/Test/Test_MatchImage.py b/dsRag/Test/Test_MatchImage.py index 557d5e29..a05cbebe 100644 --- a/dsRag/Test/Test_MatchImage.py +++ b/dsRag/Test/Test_MatchImage.py @@ -1,9 +1,78 @@ -import os - +import re import docx -from Util.WordImageUtil import extract_images_from_docx - +import os +import shutil +import uuid +import zipfile + +from docx import Document +from docx.oxml.ns import nsmap + + +def extract_images_from_docx(docx_path, output_folder): + """ + 从docx提取图片并记录位置 + :param docx_path: Word文档路径 + :param output_folder: 图片输出文件夹 + :return: 包含图片路径和位置的列表 + """ + # 创建一个List 记录每个图片的名称和序号 + image_data = [] + # 创建临时解压目录 + temp_dir = os.path.join(output_folder, "temp_docx") + os.makedirs(temp_dir, exist_ok=True) + + # 解压docx文件 + with zipfile.ZipFile(docx_path, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + # 读取主文档关系 + with open(os.path.join(temp_dir, 'word', '_rels', 'document.xml.rels'), 'r') as rels_file: + rels_content = rels_file.read() + + # 加载主文档 + doc = Document(docx_path) + img_counter = 1 + + # 遍历所有段落 + for para_idx, paragraph in enumerate(doc.paragraphs): + for run_idx, run in enumerate(paragraph.runs): + # 检查运行中的图形 + for element in run._element: + if element.tag.endswith('drawing'): + # 提取图片关系ID + blip = element.find('.//a:blip', namespaces=nsmap) + if blip is not None: + embed_id = blip.get('{%s}embed' % nsmap['r']) + + # 从关系文件中获取图片文件名 + rel_entry = f' 记录每个图片的名称和序号 + image_data = [] # 创建临时解压目录 temp_dir = os.path.join(output_folder, "temp_docx") os.makedirs(temp_dir, exist_ok=True) @@ -30,8 +32,6 @@ def extract_images_from_docx(docx_path, output_folder): doc = Document(docx_path) img_counter = 1 - idx = 0 - # 遍历所有段落 for para_idx, paragraph in enumerate(doc.paragraphs): for run_idx, run in enumerate(paragraph.runs): @@ -58,8 +58,9 @@ def extract_images_from_docx(docx_path, output_folder): # 创建输出文件名 ext = os.path.splitext(src_path)[1] # 名称为uuid - idx = idx + 1 - img_name = f"{idx}{ext}" + fileName=uuid.uuid4().hex + img_name = f"{fileName}{ext}" + image_data.append(img_name) dest_path = os.path.join(output_folder, img_name) # 复制图片 shutil.copy(src_path, dest_path) @@ -68,3 +69,4 @@ def extract_images_from_docx(docx_path, output_folder): # 清理临时目录 shutil.rmtree(temp_dir) + return image_data diff --git a/dsRag/Util/__pycache__/WordImageUtil.cpython-310.pyc b/dsRag/Util/__pycache__/WordImageUtil.cpython-310.pyc index 895377862d01e07dd15aff546e0bf3a991dac0e6..c91992519f8544fdc99cae9e46c21d500ad5ec8d 100644 GIT binary patch delta 819 zcmZWn%Wl*#6tx{EllQ!5Izy=_s(@~aKm~Rzc&#dgRK#P{DvEGA4|N{RWMCH86lv6D zsj4iiqGZXEFF^c8U{{IH0PHCes)%)u&pp@Y+Hvy5`RTYr$1#z3oM&F=v32ZzTy2WX z67f*PIKv3F5hoGq;e-HYI3-*WnF7l3x9UCxnrNV;7^9bnsT5H(Q!pCnx6t_7JE6+) zT11rl+d^R)%P~ErjLyiB%~ennJw;sm&;R6t-vBhjJjV>KM<(QY)(k&F``brbgd5xx zlG(GRw0$5Hi`!uL9FcMEfYWo6JTp@qo(W!HR#as6A>l5#Or1#8O-jrWDs7+|N-@tf z7xE{Frg*IP&+}Vgf9R|ny?^m`@HjQ zz#=8Q5nW0ap$d(ZU<5id?jf%4?d*AbFGS_?`3C~BiB_vICqHl&x0EG;@0BgQ6n|Hq zmoszZhkiTsN1bs-!^vQr#SiF4gH9%0mZ=igGo=$uGtD23f&q&UY5jULGuZIW)EoIx zr{xSM(P$ERJHuTTge|k%_uGNT{K${L()(6Rmpgb}HqBy3T^76FR9`Lcba#WNem`jG z-G18}2qszv<7nRUGYZ$^PwHlKL;QLO>{y& zkTdejyh?F+SGZPj~Oi*P4C)K>lg DO~lmM delta 795 zcmY*W&ubGw6rP!#O=h>dlix{GMJ#w}m4XNHq6M)B6>k-*OC& z_n?k!2IC>d;4>v8ByAcqO@SuNFl3fyz`#0Ul!QFCH36QmW8YZXiZhy>e6Ngd>`_Y6 z@e(7}r*o$S@@JOG>%6w#5ZqWL(!Q!4`nMN*1coA^S>$Xv4aeQP@xt zLr{|YWBAV6Rw8cK@dIWPos~#dZivX(0je#=r_Zy!0!#i{xEE8 z^md@7qThTuKe4u9KM<)wBSq02dmhhOvtQJDnOhYv(8MqljJxkq>f zDI}GaN3Z^!<1S{wIpr+6pSeB~%%hTeOfdGA{bYaSyt;)I3y$(}Qr9s}_;9!ls(>)y z=@k`Ap_js?-hO9isXI^>O4CZUn4-&c6X`_U>-1o_(^^g+WFRpKA}EgNTPFJtEM>-` diff --git a/dsRag/static/Test/1.txt b/dsRag/static/Test/1.txt index 672fbbbf..7667506d 100644 --- a/dsRag/static/Test/1.txt +++ b/dsRag/static/Test/1.txt @@ -2,4 +2,4 @@ 在教学过程中,引导学生构建和理解模型,不仅能提升他们分析和解决问题的能力,还能激发他们发现问题和提出问题的意识。例如,在认识路程模型时,教师可通过生活化情境让学生理解速度的概念及其单位表示。 模型思想是《义务教育数学课程标准》中强调的核心素养之一,它帮助学生建立从现实世界抽象出数学问题的能力,并通过数学语言进行描述和解释。 因此,在“综合与实践”类教学内容中,应加强模型的应用训练,以培养学生应用数学知识解决实际问题的能力。 -【图片1】 \ No newline at end of file +81572c98254043d4a475ab7381979a67.png \ No newline at end of file diff --git a/dsRag/static/Test/2.txt b/dsRag/static/Test/2.txt index a2f20a7c..fcab777c 100644 --- a/dsRag/static/Test/2.txt +++ b/dsRag/static/Test/2.txt @@ -1,2 +1,2 @@ 我随便写点什么 -【图片2】 \ No newline at end of file +81572c98254043d4a475ab7381979a67.png \ No newline at end of file diff --git a/dsRag/static/Test/1.png b/dsRag/static/Test/81572c98254043d4a475ab7381979a67.png similarity index 100% rename from dsRag/static/Test/1.png rename to dsRag/static/Test/81572c98254043d4a475ab7381979a67.png diff --git a/dsRag/static/Test/2.png b/dsRag/static/Test/b536401321764b20b3c000c120ab3c5b.png similarity index 100% rename from dsRag/static/Test/2.png rename to dsRag/static/Test/b536401321764b20b3c000c120ab3c5b.png