From 268f1f4172844af410e2bee369b087a49fdfcaf2 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 19 Aug 2025 07:45:15 +0800 Subject: [PATCH] 'commit' --- .../Config/__pycache__/Config.cpython-310.pyc | Bin 843 -> 831 bytes .../ElasticSearch/T1_RebuildMapping.py | 2 +- dsSchoolBuddy/ElasticSearch/T2_BgeM3.py | 39 ++++ dsSchoolBuddy/ElasticSearch/T2_SplitTxt.py | 191 ------------------ dsSchoolBuddy/Test/G2_TeachingStudent.py | 69 +++++++ 5 files changed, 109 insertions(+), 192 deletions(-) create mode 100644 dsSchoolBuddy/ElasticSearch/T2_BgeM3.py delete mode 100644 dsSchoolBuddy/ElasticSearch/T2_SplitTxt.py create mode 100644 dsSchoolBuddy/Test/G2_TeachingStudent.py diff --git a/dsSchoolBuddy/Config/__pycache__/Config.cpython-310.pyc b/dsSchoolBuddy/Config/__pycache__/Config.cpython-310.pyc index 82eb2f5e7887007c2cbe63d681767ca4e295b3d2..e08dddfa2d8484bb71959dd5bcf8508175e52c86 100644 GIT binary patch literal 831 zcmaJ<&2HL25H|lmfh4M;^i-*8d*J}s5Ty`RRZVarE8s-Xs?93W%HG}Jg~c{r8?c|H zx1Rb2eHN~Li}unb{IsX;XlDF<_IxwDYqwl3N@A%$zS&Vql76nizbh-i2jP4ju!Ivh zDJ+H4Uo$|$8IZtPki`5{$M%nQU%Y>!`78h=5S(FGOLX3@_?_3 zmdPoCW*FG3(wzA8F@lKr;!T-z2D$An;KKBsRgpTFETLmgNqo>zRlRPFNOjtbn|xCB ztQobU@xt~;Q-|>>jz%6cZL$m=O~ghH!6b+*geK0?9kYnVFLoO+JzC?GQRF(0%w5!w zQ=cnTN=p)zUP@B@eBa6^zUfSeU!6DBZQuM?p}zS#el3QR6|6MPW}C?E*4dfdY&9DV zMQ#yyettXcRh#*N{7{3-`=`T1@QM#Y7Ak7iJ6MR6Q1Qqf%NG zv?08_HLmn)aIEz^S{L?jyIKz#YF`884Wr)a@J71_ha1>YhZ_7e=z+opclACv+19FG z;K$n=&45E4XrMIE2C8whkpxFuw68zi5xUXUjcd5;bENj}VXxQU6URGZU;1pv{GIl@ zx39b50~QF=xCd2UbM=;6Gs#>@;)JC Z_$v{T$z)<<+283xI`C$Nt!<)L>jv^C4a;TRacc(YwxCZ>)5H+ZrYr< zAt7<#0s`>>a6&u;FOe$(m5mh#XtL9}*)faNM7HaC3!O z@Pa9AFoQY3;Q$C=9`HB_f>;0o4&gA4Fp1*Ww-5+n5rpvyh~PMg;sjnj;z11G0U}-l zEBG#m<8_e0_f#&qF^NgpVk*N#GP7?uZYsEr<4xCOYGQ&KQofkoD&-1g5YZ4N5NneM zhUblFC7r%5tdz5vOp*r?Vj=38I@mZqD;p($+;zL>L)nz@X~8e;uxn9xq9sGqkl|2| z0+F>eb%=b=0P!>8nu80d6PINYlv_6DP)-! zzOB=g?hMmRN60fZi);=ZOwwo46qA?Bg?0LoU#1x)4SRCx3e!_XFosqbN*-VfBpUVc_Pb|bdT6Nu{~I=H;@KvhG`Q@ zl%Ym2HtnNZ(kbuUGuNJQ+~mIISRUiia&%My5>W?NPou-d7^ zez$Qme5D*r#YOh2Xnn7)Y*%5eulCwaIB%=6`B&AQYHt^;EO4XVtoJ~yrL^;VtYsaD z3s~* 记录每个图片的名称和序号 - image_data = [] - # 创建临时解压目录 - temp_dir = os.path.join(output_folder, "temp_docx") - os.makedirs(temp_dir, exist_ok=True) - - # 解压docx文件 - with zipfile.ZipFile(docx_path, 'r') as zip_ref: - zip_ref.extractall(temp_dir) - - # 读取主文档关系 - with open(os.path.join(temp_dir, 'word', '_rels', 'document.xml.rels'), 'r') as rels_file: - rels_content = rels_file.read() - - # 加载主文档 - doc = Document(docx_path) - img_counter = 1 - - # 遍历所有段落 - for para_idx, paragraph in enumerate(doc.paragraphs): - for run_idx, run in enumerate(paragraph.runs): - # 检查运行中的图形 - for element in run._element: - if element.tag.endswith('drawing'): - # 提取图片关系ID - blip = element.find('.//a:blip', namespaces=nsmap) - if blip is not None: - embed_id = blip.get('{%s}embed' % nsmap['r']) - - # 从关系文件中获取图片文件名 - rel_entry = f' 2 else line - elif line and line[0].isdigit(): - line = line[1:] - line = line.strip() - if in_block and line: # 只添加非空行 - current_block.append(line) - - if current_block: - blocks.append('\n'.join(current_block)) - - return [(i + 1, block) for i, block in enumerate(blocks)] - - -def save_to_txt(content, file_path, mode='w'): - """将内容保存到文本文件""" - try: - with open(file_path, mode, encoding='utf-8') as f: - f.write(content) - return True - except Exception as e: - print(f"保存文件{file_path}时出错: {str(e)}") - return False - - -class ImageReplacer: - def __init__(self, image_list): - self.image_list = image_list - self.current_idx = 0 - - def replace(self, match): - if self.current_idx < len(self.image_list): - result = f"![](./Images/{self.image_list[self.current_idx]})" - self.current_idx += 1 - return result - return match.group() - - -def process_document(docx_file, txt_output_dir, img_output_dir): - # 提取图片 - listImage = extract_images_from_docx(docx_file, img_output_dir) - print(f"图片数量为:{len(listImage)}") - - # 读取内容 - res = DocxUtil.get_docx_content_by_pandoc(docx_file) - # 分块 - chunks = split_into_blocks(res) - saved_count = 0 - - # 使用原来的正则表达式 - pattern = re.compile(r'【图片\d+】') - # 创建图片替换器 - replacer = ImageReplacer(listImage) - - for x in chunks: - firstLine = x[1].split("\n")[0].strip() - content = x[1][len(firstLine):].strip() - - # 使用类方法替换图片 - content = pattern.sub(replacer.replace, content) - # 保存文本文件 - # 从docx文件名提取学科和编号 - docx_name = os.path.basename(docx_file).split('.')[0] - subject_part = '_'.join(docx_name.split('_')[-2:]) # 获取最后两部分如CHINESE_1 - output_file = os.path.join(txt_output_dir, f"{subject_part}_{x[0]}.txt") - full_content = f"{firstLine}\n{content}" - if save_to_txt(full_content, output_file, mode='w'): - saved_count += 1 - - print(f"处理完成,共保存{saved_count}个文件到目录: {txt_output_dir}") - - -if __name__ == "__main__": - txt_output_dir = "../Txt/" - img_output_dir = "../static/Images/" - # 清空上面的两个输出目录,用os进行删除,在Windows环境中进行 - if os.path.exists(txt_output_dir): - shutil.rmtree(txt_output_dir) - if os.path.exists(img_output_dir): - shutil.rmtree(img_output_dir) - # 创建输出目录 - os.makedirs(txt_output_dir, exist_ok=True) - os.makedirs(img_output_dir, exist_ok=True) - - # 遍历static/Txt/下所有的docx - for filename in os.listdir("../static/Txt/"): - print("正在处理文件:" + filename) - # 这里需要文件的全称路径 - filename = os.path.join("../static/Txt/", filename) - process_document(filename, txt_output_dir, img_output_dir) diff --git a/dsSchoolBuddy/Test/G2_TeachingStudent.py b/dsSchoolBuddy/Test/G2_TeachingStudent.py new file mode 100644 index 00000000..8470248c --- /dev/null +++ b/dsSchoolBuddy/Test/G2_TeachingStudent.py @@ -0,0 +1,69 @@ +import sys + +from Util import LlmUtil + + +def get_system_prompt(): + """获取系统提示""" + return """ + 你是一位平易近人且教学方法灵活的教师,通过引导学生自主学习来帮助他们掌握知识。 + + 严格遵循以下教学规则: + 1. 首先了解学生情况:在开始讲解前,询问学生的年级水平和对勾股定理的了解程度。 + 2. 基于现有知识构建:将新思想与学生已有的知识联系起来。 + 3. 引导而非灌输:使用问题、提示和小步骤,让学生自己发现答案。 + 4. 检查和强化:在讲解难点后,确认学生能够重述或应用这些概念。 + 5. 变化节奏:混合讲解、提问和互动活动,让教学像对话而非讲座。 + + 最重要的是:不要直接给出答案,而是通过合作和基于学生已有知识的引导,帮助学生自己找到答案。 + """ + + +def initialize_chat_history(): + """初始化对话历史""" + # 包含系统提示作为第一条消息 + return [{ + "role": "system", + "content": get_system_prompt() + }] + + +if __name__ == "__main__": + # 初始化对话历史(包含系统提示) + chat_history = initialize_chat_history() + + # 欢迎消息 + print("教师助手已启动。输入 'exit' 或 '退出' 结束对话。") + print("你可以开始提问了,例如: '讲解一下勾股定理的证明'") + + # 多轮对话循环 + while True: + # 获取用户输入 + user_input = input("\n你: ") + + # 检查是否退出 + if user_input.lower() in ['exit', '退出']: + print("对话已结束。") + sys.exit(0) + + # 添加用户输入到对话历史 + chat_history.append({"role": "user", "content": user_input}) + + # 发送请求(传递用户输入文本和系统提示) + print("\n教师助手:") + try: + # 调用LlmUtil获取响应,传递用户输入文本和系统提示 + response_content = LlmUtil.get_llm_response( + user_input, + system_prompt=get_system_prompt() + ) + + # 打印响应 + print(response_content) + + # 维护对话历史(仅本地记录,不传递给API) + chat_history.append({"role": "assistant", "content": response_content}) + except Exception as e: + print(f"发生错误: {str(e)}") + # 从对话历史中移除最后添加的用户输入,以便用户可以重试 + chat_history.pop()