parent
9157880400
commit
ad174680e6
@ -0,0 +1,99 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
from Util.SplitDocxUtil import SplitDocxUtil
|
||||||
|
|
||||||
|
|
||||||
|
def split_into_blocks(text):
|
||||||
|
"""按行遍历文本,发现'问题X'或'话题X'时开始分割,只移除前缀但保留整行内容"""
|
||||||
|
blocks = []
|
||||||
|
current_block = []
|
||||||
|
in_block = False
|
||||||
|
|
||||||
|
for line in text.splitlines():
|
||||||
|
if line.startswith(('问题', '话题')) and any(c.isdigit() for c in line[:5]):
|
||||||
|
if in_block:
|
||||||
|
blocks.append('\n'.join(current_block))
|
||||||
|
current_block = []
|
||||||
|
in_block = True
|
||||||
|
# 循环移除问题和话题前缀后的数字
|
||||||
|
while line and (line.startswith(('问题', '话题')) or (line and line and line[0].isdigit())):
|
||||||
|
if line.startswith(('问题', '话题')):
|
||||||
|
line = line[2:] if len(line) > 2 else line
|
||||||
|
elif line and line[0].isdigit():
|
||||||
|
line = line[1:] if len(line) > 1 else line
|
||||||
|
line = line.strip()
|
||||||
|
if in_block and line: # 只添加非空行
|
||||||
|
current_block.append(line)
|
||||||
|
|
||||||
|
if current_block:
|
||||||
|
blocks.append('\n'.join(current_block))
|
||||||
|
|
||||||
|
return [(i+1, block) for i, block in enumerate(blocks)]
|
||||||
|
|
||||||
|
def process_document(input_path, output_dir):
|
||||||
|
"""处理文档主函数"""
|
||||||
|
text = SplitDocxUtil.read_docx(input_path)
|
||||||
|
if not text:
|
||||||
|
print("无法读取输入文件内容")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 清空目录操作已移到process_directory函数中
|
||||||
|
|
||||||
|
chunks = split_into_blocks(text)
|
||||||
|
print(f"共分割出{len(chunks)}个段落块")
|
||||||
|
|
||||||
|
saved_count = 0
|
||||||
|
# 从输入文件名中提取MATH_1部分
|
||||||
|
file_prefix = os.path.basename(input_path).split('.')[0].split('_')[-2] + '_' + os.path.basename(input_path).split('.')[0].split('_')[-1]
|
||||||
|
|
||||||
|
for chunk_num, chunk in chunks:
|
||||||
|
chunk = chunk.strip() # 确保去除空白字符
|
||||||
|
output_file = os.path.join(output_dir, f"{file_prefix}_{chunk_num}.txt")
|
||||||
|
if save_to_txt(chunk, output_file, mode='w'):
|
||||||
|
saved_count += 1
|
||||||
|
|
||||||
|
print(f"处理完成,共保存{saved_count}个文件到目录: {output_dir}")
|
||||||
|
return saved_count > 0
|
||||||
|
|
||||||
|
# 保留原有的save_to_txt函数
|
||||||
|
def save_to_txt(content, file_path, mode='w'):
|
||||||
|
"""将内容保存到文本文件"""
|
||||||
|
try:
|
||||||
|
with open(file_path, mode, encoding='utf-8') as f:
|
||||||
|
f.write(content)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"保存文件{file_path}时出错: {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_directory(input_dir, output_dir):
|
||||||
|
"""处理目录下所有docx文件"""
|
||||||
|
if not os.path.exists(input_dir):
|
||||||
|
print(f"输入目录不存在: {input_dir}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 确保输出目录存在并清空目录(只需一次)
|
||||||
|
if os.path.exists(output_dir):
|
||||||
|
for file in os.listdir(output_dir):
|
||||||
|
os.remove(os.path.join(output_dir, file))
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
docx_files = [f for f in os.listdir(input_dir) if f.lower().endswith('.docx')]
|
||||||
|
if not docx_files:
|
||||||
|
print(f"目录中没有找到docx文件: {input_dir}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
success_count = 0
|
||||||
|
for docx_file in docx_files:
|
||||||
|
input_path = os.path.join(input_dir, docx_file)
|
||||||
|
print(f"正在处理文件: {docx_file}")
|
||||||
|
if process_document(input_path, output_dir):
|
||||||
|
success_count += 1
|
||||||
|
|
||||||
|
print(f"处理完成,共处理{success_count}/{len(docx_files)}个文件")
|
||||||
|
return success_count > 0
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
input_dir = '../static/Txt'
|
||||||
|
output_dir = '../Txt'
|
||||||
|
process_directory(input_dir, output_dir)
|
@ -1,44 +0,0 @@
|
|||||||
from bs4 import BeautifulSoup
|
|
||||||
from docx import Document
|
|
||||||
"""
|
|
||||||
pip install python-docx html2text beautifulsoup4
|
|
||||||
"""
|
|
||||||
|
|
||||||
def html_to_word(html_content, word_path):
|
|
||||||
# 解析 HTML
|
|
||||||
soup = BeautifulSoup(html_content, 'html.parser')
|
|
||||||
|
|
||||||
# 创建 Word 文档
|
|
||||||
doc = Document()
|
|
||||||
|
|
||||||
# 遍历 HTML 的所有段落
|
|
||||||
for element in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div']):
|
|
||||||
# 获取文本内容
|
|
||||||
text = element.get_text(strip=True)
|
|
||||||
if text:
|
|
||||||
# 添加到 Word 文档
|
|
||||||
doc.add_paragraph(text)
|
|
||||||
|
|
||||||
# 保存 Word 文档
|
|
||||||
doc.save(word_path)
|
|
||||||
print(f"HTML content saved to {word_path}")
|
|
||||||
|
|
||||||
|
|
||||||
# 示例 HTML 内容
|
|
||||||
html_content = """
|
|
||||||
<!DOCTYPE html>
|
|
||||||
<html>
|
|
||||||
<head>
|
|
||||||
<title>Sample HTML</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
<h1>Heading 1</h1>
|
|
||||||
<p>This is a paragraph.</p>
|
|
||||||
<h2>Heading 2</h2>
|
|
||||||
<div>Content inside a div.</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 调用函数
|
|
||||||
html_to_word(html_content, "output.docx")
|
|
Binary file not shown.
@ -1,4 +0,0 @@
|
|||||||
问题1 教学建议与意义
|
|
||||||
在教学过程中,引导学生构建和理解模型,不仅能提升他们分析和解决问题的能力,还能激发他们发现问题和提出问题的意识。例如,在认识路程模型时,教师可通过生活化情境让学生理解速度的概念及其单位表示。
|
|
||||||
模型思想是《义务教育数学课程标准》中强调的核心素养之一,它帮助学生建立从现实世界抽象出数学问题的能力,并通过数学语言进行描述和解释。
|
|
||||||
因此,在“综合与实践”类教学内容中,应加强模型的应用训练,以培养学生应用数学知识解决实际问题的能力。
|
|
@ -1 +0,0 @@
|
|||||||
问题2 我随便写点什么
|
|
Loading…
Reference in new issue