diff --git a/dsRag/Test/Test_MatchImage.py b/dsRag/Test/Test_MatchImage.py index 557d5e29..a05cbebe 100644 --- a/dsRag/Test/Test_MatchImage.py +++ b/dsRag/Test/Test_MatchImage.py @@ -1,9 +1,78 @@ -import os - +import re import docx -from Util.WordImageUtil import extract_images_from_docx - +import os +import shutil +import uuid +import zipfile + +from docx import Document +from docx.oxml.ns import nsmap + + +def extract_images_from_docx(docx_path, output_folder): + """ + 从docx提取图片并记录位置 + :param docx_path: Word文档路径 + :param output_folder: 图片输出文件夹 + :return: 包含图片路径和位置的列表 + """ + # 创建一个List 记录每个图片的名称和序号 + image_data = [] + # 创建临时解压目录 + temp_dir = os.path.join(output_folder, "temp_docx") + os.makedirs(temp_dir, exist_ok=True) + + # 解压docx文件 + with zipfile.ZipFile(docx_path, 'r') as zip_ref: + zip_ref.extractall(temp_dir) + + # 读取主文档关系 + with open(os.path.join(temp_dir, 'word', '_rels', 'document.xml.rels'), 'r') as rels_file: + rels_content = rels_file.read() + + # 加载主文档 + doc = Document(docx_path) + img_counter = 1 + + # 遍历所有段落 + for para_idx, paragraph in enumerate(doc.paragraphs): + for run_idx, run in enumerate(paragraph.runs): + # 检查运行中的图形 + for element in run._element: + if element.tag.endswith('drawing'): + # 提取图片关系ID + blip = element.find('.//a:blip', namespaces=nsmap) + if blip is not None: + embed_id = blip.get('{%s}embed' % nsmap['r']) + + # 从关系文件中获取图片文件名 + rel_entry = f' 记录每个图片的名称和序号 + image_data = [] # 创建临时解压目录 temp_dir = os.path.join(output_folder, "temp_docx") os.makedirs(temp_dir, exist_ok=True) @@ -30,8 +32,6 @@ def extract_images_from_docx(docx_path, output_folder): doc = Document(docx_path) img_counter = 1 - idx = 0 - # 遍历所有段落 for para_idx, paragraph in enumerate(doc.paragraphs): for run_idx, run in enumerate(paragraph.runs): @@ -58,8 +58,9 @@ def extract_images_from_docx(docx_path, output_folder): # 创建输出文件名 ext = os.path.splitext(src_path)[1] # 名称为uuid - idx = idx + 1 - img_name = f"{idx}{ext}" + fileName=uuid.uuid4().hex + img_name = f"{fileName}{ext}" + image_data.append(img_name) dest_path = os.path.join(output_folder, img_name) # 复制图片 shutil.copy(src_path, dest_path) @@ -68,3 +69,4 @@ def extract_images_from_docx(docx_path, output_folder): # 清理临时目录 shutil.rmtree(temp_dir) + return image_data diff --git a/dsRag/Util/__pycache__/WordImageUtil.cpython-310.pyc b/dsRag/Util/__pycache__/WordImageUtil.cpython-310.pyc index 89537786..c9199251 100644 Binary files a/dsRag/Util/__pycache__/WordImageUtil.cpython-310.pyc and b/dsRag/Util/__pycache__/WordImageUtil.cpython-310.pyc differ diff --git a/dsRag/static/Test/1.txt b/dsRag/static/Test/1.txt index 672fbbbf..7667506d 100644 --- a/dsRag/static/Test/1.txt +++ b/dsRag/static/Test/1.txt @@ -2,4 +2,4 @@ 在教学过程中,引导学生构建和理解模型,不仅能提升他们分析和解决问题的能力,还能激发他们发现问题和提出问题的意识。例如,在认识路程模型时,教师可通过生活化情境让学生理解速度的概念及其单位表示。 模型思想是《义务教育数学课程标准》中强调的核心素养之一,它帮助学生建立从现实世界抽象出数学问题的能力,并通过数学语言进行描述和解释。 因此,在“综合与实践”类教学内容中,应加强模型的应用训练,以培养学生应用数学知识解决实际问题的能力。 -【图片1】 \ No newline at end of file +81572c98254043d4a475ab7381979a67.png \ No newline at end of file diff --git a/dsRag/static/Test/2.txt b/dsRag/static/Test/2.txt index a2f20a7c..fcab777c 100644 --- a/dsRag/static/Test/2.txt +++ b/dsRag/static/Test/2.txt @@ -1,2 +1,2 @@ 我随便写点什么 -【图片2】 \ No newline at end of file +81572c98254043d4a475ab7381979a67.png \ No newline at end of file diff --git a/dsRag/static/Test/1.png b/dsRag/static/Test/81572c98254043d4a475ab7381979a67.png similarity index 100% rename from dsRag/static/Test/1.png rename to dsRag/static/Test/81572c98254043d4a475ab7381979a67.png diff --git a/dsRag/static/Test/2.png b/dsRag/static/Test/b536401321764b20b3c000c120ab3c5b.png similarity index 100% rename from dsRag/static/Test/2.png rename to dsRag/static/Test/b536401321764b20b3c000c120ab3c5b.png