@ -1,31 +1,99 @@
|
||||
import os
|
||||
import uuid
|
||||
word_path = "d:\\dsWork\\dsProject\\dsRag\\Test\\带图的WORD文档.docx"
|
||||
|
||||
from docx import Document
|
||||
from docx.oxml import parse_xml
|
||||
from docx.oxml.ns import nsmap
|
||||
import os
|
||||
import zipfile
|
||||
import shutil
|
||||
|
||||
def extract_images_from_word(word_path, output_dir):
|
||||
|
||||
def extract_images_from_docx(docx_path, output_folder):
|
||||
"""
|
||||
从Word文档中提取图片并保存到指定目录
|
||||
:param word_path: Word文档路径
|
||||
:param output_dir: 图片输出目录
|
||||
从docx提取图片并记录位置
|
||||
:param docx_path: Word文档路径
|
||||
:param output_folder: 图片输出文件夹
|
||||
:return: 包含图片路径和位置的列表
|
||||
"""
|
||||
doc = Document(word_path)
|
||||
|
||||
# 确保输出目录存在
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 获取文档中的所有图片
|
||||
for rel in doc.part.rels.values():
|
||||
if "image" in rel.target_ref:
|
||||
img_data = rel.target_part.blob
|
||||
|
||||
# 使用UUID命名图片
|
||||
output_path = os.path.join(output_dir, f"{uuid.uuid4()}.jpg")
|
||||
with open(output_path, "wb") as f:
|
||||
f.write(img_data)
|
||||
print(f"图片已保存到: {output_path}")
|
||||
# 创建临时解压目录
|
||||
temp_dir = os.path.join(output_folder, "temp_docx")
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
|
||||
# 解压docx文件
|
||||
with zipfile.ZipFile(docx_path, 'r') as zip_ref:
|
||||
zip_ref.extractall(temp_dir)
|
||||
|
||||
# 读取主文档关系
|
||||
with open(os.path.join(temp_dir, 'word', '_rels', 'document.xml.rels'), 'r') as rels_file:
|
||||
rels_content = rels_file.read()
|
||||
|
||||
# 加载主文档
|
||||
doc = Document(docx_path)
|
||||
image_data = []
|
||||
img_counter = 1
|
||||
|
||||
# 遍历所有段落
|
||||
for para_idx, paragraph in enumerate(doc.paragraphs):
|
||||
for run_idx, run in enumerate(paragraph.runs):
|
||||
# 检查运行中的图形
|
||||
for element in run._element:
|
||||
if element.tag.endswith('drawing'):
|
||||
# 提取图片关系ID
|
||||
blip = element.find('.//a:blip', namespaces=nsmap)
|
||||
if blip is not None:
|
||||
embed_id = blip.get('{%s}embed' % nsmap['r'])
|
||||
|
||||
# 从关系文件中获取图片文件名
|
||||
rel_entry = f'<Relationship Id="{embed_id}"'
|
||||
if rel_entry in rels_content:
|
||||
start = rels_content.find(rel_entry)
|
||||
target_start = rels_content.find('Target="', start) + 8
|
||||
target_end = rels_content.find('"', target_start)
|
||||
image_path = rels_content[target_start:target_end]
|
||||
|
||||
# 构建图片源路径
|
||||
src_path = os.path.join(temp_dir, 'word', image_path.replace('..', '').lstrip('/'))
|
||||
|
||||
if os.path.exists(src_path):
|
||||
# 创建输出文件名
|
||||
ext = os.path.splitext(src_path)[1]
|
||||
img_name = f"image_{img_counter}{ext}"
|
||||
dest_path = os.path.join(output_folder, img_name)
|
||||
|
||||
# 复制图片
|
||||
shutil.copy(src_path, dest_path)
|
||||
|
||||
# 记录位置信息
|
||||
location = {
|
||||
"paragraph_index": para_idx,
|
||||
"run_index": run_idx,
|
||||
"page_number": None, # docx不直接存储页码
|
||||
"paragraph_text": paragraph.text[:50] + "..." # 截取部分文本
|
||||
}
|
||||
|
||||
image_data.append({
|
||||
"image_path": dest_path,
|
||||
"location": location
|
||||
})
|
||||
|
||||
img_counter += 1
|
||||
|
||||
# 清理临时目录
|
||||
shutil.rmtree(temp_dir)
|
||||
return image_data
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
word_path = "d:\\dsWork\\dsProject\\dsRag\\Test\\带图的WORD文档.docx"
|
||||
output_dir = os.path.abspath(os.path.join(os.path.dirname(word_path), "..", "static", "Images"))
|
||||
|
||||
extract_images_from_word(word_path, output_dir)
|
||||
|
||||
output_dir = "extracted_images" # 图片输出目录
|
||||
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
images = extract_images_from_docx(word_path, output_dir)
|
||||
|
||||
# 打印结果
|
||||
for img in images:
|
||||
print(f"图片保存至: {img['image_path']}")
|
||||
loc = img['location']
|
||||
print(f"位置信息: 段落 {loc['paragraph_index']}, 运行 {loc['run_index']}")
|
||||
print(f"上下文: {loc['paragraph_text']}\n")
|
||||
|
@ -1,21 +0,0 @@
|
||||
from nltk.corpus import wordnet
|
||||
import jieba
|
||||
|
||||
def expand_with_synonyms(query):
|
||||
words = jieba.lcut(query)
|
||||
expanded = []
|
||||
for word in words:
|
||||
synonyms = set()
|
||||
for syn in wordnet.synsets(word, lang='cmn'):
|
||||
for lemma in syn.lemma_names('cmn'):
|
||||
synonyms.add(lemma)
|
||||
if synonyms:
|
||||
expanded.append(f"({'|'.join(synonyms)})")
|
||||
else:
|
||||
expanded.append(word)
|
||||
return ' '.join(expanded)
|
||||
|
||||
original_query = "微积分的基本定理是什么?"
|
||||
expanded_query = expand_with_synonyms(original_query)
|
||||
print(f"原始查询: {original_query}")
|
||||
print(f"扩展后查询: {expanded_query}")
|
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 120 KiB |
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 120 KiB |
@ -0,0 +1,4 @@
|
||||
问题1 教学建议与意义
|
||||
在教学过程中,引导学生构建和理解模型,不仅能提升他们分析和解决问题的能力,还能激发他们发现问题和提出问题的意识。例如,在认识路程模型时,教师可通过生活化情境让学生理解速度的概念及其单位表示。
|
||||
模型思想是《义务教育数学课程标准》中强调的核心素养之一,它帮助学生建立从现实世界抽象出数学问题的能力,并通过数学语言进行描述和解释。
|
||||
因此,在“综合与实践”类教学内容中,应加强模型的应用训练,以培养学生应用数学知识解决实际问题的能力。
|
@ -0,0 +1 @@
|
||||
问题2 我随便写点什么
|
After Width: | Height: | Size: 47 KiB |
After Width: | Height: | Size: 120 KiB |
After Width: | Height: | Size: 120 KiB |
After Width: | Height: | Size: 47 KiB |