diff --git a/dsRag/494913b3598b4c73876bd7b821860973.html b/dsRag/494913b3598b4c73876bd7b821860973.html new file mode 100644 index 00000000..65f69b0b --- /dev/null +++ b/dsRag/494913b3598b4c73876bd7b821860973.html @@ -0,0 +1,79 @@ + + + + +
基于欧几里得几何的基本概念,针对小学阶段学生认知特点,通过直观体验和操作活动帮助学生建立对点、线、面、体、角的描述性理解,重点突破“角”的概念教学难点。
+ +教具演示:使用无刻度钟表模型,通过时针/分针位置变化让学生感知:
+概念 | +教学要点 | +常见误区 | +
---|---|---|
角 | +强调"两边所夹部分",通过大小比较理解本质 | +避免使用射线定义,不强调边的无限性 | +
线 | +从直线段出发,延伸理解射线与直线 | +不过分强调"直",但通过"两点间最短"感悟特性 | +
可结合《课标》例46(几何概念计数)、例58(立体图形抽象)开展跨课时整合教学。
+ diff --git a/dsRag/Doc/9、Pandoc下载.md b/dsRag/Doc/9、Pandoc下载.md new file mode 100644 index 00000000..674e9ba8 --- /dev/null +++ b/dsRag/Doc/9、Pandoc下载.md @@ -0,0 +1,2 @@ +https://github.com/jgm/pandoc/releases/tag/3.7.0.2 +https://objects.githubusercontent.com/github-production-release-asset-2e65be/571770/92882bf5-3b76-4345-b08a-9d9badc74957?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250626%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250626T231242Z&X-Amz-Expires=1800&X-Amz-Signature=55493529bc6e5a3779e95bcdd9f33cf09477d3e47f9a441b9412b5b193d788db&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dpandoc-3.7.0.2-windows-x86_64.msi&response-content-type=application%2Foctet-stream \ No newline at end of file diff --git a/dsRag/Start.py b/dsRag/Start.py index 7c5ed95b..3daf0a20 100644 --- a/dsRag/Start.py +++ b/dsRag/Start.py @@ -1,9 +1,11 @@ +import os +import tempfile import urllib.parse +import uuid from contextlib import asynccontextmanager from io import BytesIO from logging.handlers import RotatingFileHandler -import html2text import jieba # 导入 jieba 分词库 import uvicorn from docx import Document @@ -20,6 +22,13 @@ from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT, MS_HOST, MS_PORT, MS_MA from Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from Milvus.Utils.MilvusConnectionPool import * from Milvus.Utils.MilvusConnectionPool import MilvusConnectionPool +import subprocess + + +# 将HTML文件转换为Word文件 +def html_to_word_pandoc(html_file, output_file): + subprocess.run(['pandoc', html_file, '-o', output_file]) + # 初始化日志 logger = logging.getLogger(__name__) @@ -75,10 +84,8 @@ def text_to_embedding(text): async def generate_stream(client, milvus_pool, collection_manager, query): - """生成SSE流""" # 从连接池获取连接 connection = milvus_pool.get_connection() - try: # 1. 将查询文本转换为向量 current_embedding = text_to_embedding(query) @@ -89,7 +96,7 @@ async def generate_stream(client, milvus_pool, collection_manager, query): "params": {"nprobe": MS_NPROBE} # 设置 IVF_FLAT 的 nprobe 参数 } # 7. 将文本转换为嵌入向量 - results = collection_manager.search(current_embedding, search_params, limit=5) # 返回 2 条结果 + results = collection_manager.search(current_embedding, search_params, limit=5) # 返回 5 条结果 # 3. 处理搜索结果 logger.info("最相关的知识库内容:") @@ -100,7 +107,7 @@ async def generate_stream(client, milvus_pool, collection_manager, query): try: # 查询非向量字段 record = collection_manager.query_by_id(hit.id) - if hit.distance < 0.88: # 设置距离阈值 + if hit.distance < 0.88: # 设置距离阈值 logger.info(f"ID: {hit.id}") logger.info(f"标签: {record['tags']}") logger.info(f"用户问题: {record['user_input']}") @@ -144,8 +151,11 @@ async def generate_stream(client, milvus_pool, collection_manager, query): temperature=0.3, stream=False ) - - yield {"data": response.choices[0].message.content} + # 将返回的html代码保存成文件 + htmlStr = response.choices[0].message.content + with open("Static/1.html", "w", encoding="utf-8") as f: + f.write(htmlStr) + yield {"data": htmlStr} except Exception as e: yield {"data": f"生成报告时出错: {str(e)}"} finally: @@ -168,11 +178,15 @@ http://10.10.21.22:8000/static/ai.html class QueryRequest(BaseModel): query: str = Field(..., description="用户查询的问题") + class SaveWordRequest(BaseModel): html: str = Field(..., description="要保存为Word的HTML内容") + @app.post("/api/save-word") async def save_to_word(request: Request): + temp_html = None + output_file = None try: # Parse request data try: @@ -183,52 +197,42 @@ async def save_to_word(request: Request): except Exception as e: logger.error(f"Request parsing failed: {str(e)}") raise HTTPException(status_code=400, detail=f"Invalid request: {str(e)}") - - # Convert HTML to text - try: - text_maker = html2text.HTML2Text() - text_maker.ignore_links = True - text_maker.ignore_images = True - text_content = text_maker.handle(html_content) - except Exception as e: - logger.error(f"HTML conversion failed: {str(e)}") - raise HTTPException(status_code=400, detail=f"HTML processing error: {str(e)}") - - # Create Word document - try: - doc = Document() - doc.add_heading('小学数学问答', 0) - - for para in text_content.split('\n\n'): - if para.strip(): - doc.add_paragraph(para.strip()) - except Exception as e: - logger.error(f"Document creation failed: {str(e)}") - raise HTTPException(status_code=500, detail=f"Document creation error: {str(e)}") - - # Save to stream - try: - stream = BytesIO() - doc.save(stream) - stream.seek(0) - except Exception as e: - logger.error(f"Document saving failed: {str(e)}") - raise HTTPException(status_code=500, detail=f"Document saving error: {str(e)}") - - # Return response - filename = "小学数学问答.docx" - encoded_filename = urllib.parse.quote(filename) + + # 创建临时HTML文件 + temp_html = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex + ".html") + with open(temp_html, "w", encoding="utf-8") as f: + f.write(html_content) + + # 使用pandoc转换 + output_file = os.path.join(tempfile.gettempdir(), "小学数学问答.docx") + subprocess.run(['pandoc', temp_html, '-o', output_file], check=True) + + # 读取生成的Word文件 + with open(output_file, "rb") as f: + stream = BytesIO(f.read()) + + # 返回响应 + encoded_filename = urllib.parse.quote("小学数学问答.docx") return StreamingResponse( stream, media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", - headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"} - ) - + headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"}) + except HTTPException: raise except Exception as e: logger.error(f"Unexpected error: {str(e)}") raise HTTPException(status_code=500, detail="Internal server error") + finally: + # 清理临时文件 + try: + if temp_html and os.path.exists(temp_html): + os.remove(temp_html) + if output_file and os.path.exists(output_file): + os.remove(output_file) + except Exception as e: + logger.warning(f"Failed to clean up temp files: {str(e)}") + @app.post("/api/rag") async def rag_stream(request: Request): @@ -243,10 +247,10 @@ async def rag_stream(request: Request): raise HTTPException(status_code=400, detail="无效的请求格式") """RAG+DeepSeek接口""" async for chunk in generate_stream( - request.app.state.deepseek_client, - request.app.state.milvus_pool, - request.app.state.collection_manager, - query_request.query + request.app.state.deepseek_client, + request.app.state.milvus_pool, + request.app.state.collection_manager, + query_request.query ): return chunk diff --git a/dsRag/Test/TestPandoc.py b/dsRag/Test/TestPandoc.py new file mode 100644 index 00000000..e004310c --- /dev/null +++ b/dsRag/Test/TestPandoc.py @@ -0,0 +1,7 @@ +import subprocess + +def html_to_word_pandoc(html_file, output_file): + subprocess.run(['pandoc', html_file, '-o', output_file]) + +# 使用示例 +html_to_word_pandoc('../static/1.html', '../static/output.docx') \ No newline at end of file diff --git a/dsRag/static/1.html b/dsRag/static/1.html new file mode 100644 index 00000000..cf78316b --- /dev/null +++ b/dsRag/static/1.html @@ -0,0 +1,42 @@ + + + +在小学数学中,模型是用数学的语言讲述现实世界中的故事,强调如何用数学的方法描述或解决一类现实生活中的问题。模型不仅仅是数学表达,而是能够解决一类具有实际背景问题的数学方法。
+ +讨论总量与部分量之间的关系,部分量之间是并列关系,运算用加法。基本形式为:
+总量 = 部分量 + 部分量
+适用于解决图书室各类书的总和、购物总花费等问题。
+描述距离、速度、时间之间的关系,基本形式为:
+距离 = 速度 × 时间
+适用于解决“总价 = 单价 × 数量”、“总数 = 行数 × 列数”等问题。
+问题背景是在直线或平面上有规律地挖洞植树,适用于解决资源调查、环境调查等问题。
+讨论多个工程队合作完成工程的时间,基本形式为假设工程为1,适用于解决归一问题、注水问题等。
+模型的重要性不仅取决于数学表达是否完美,更取决于对现实世界的解释。通过模型的构建和理解,学生可以认识到数学是描述现实世界的强有力工具。
+ +在教学中,应通过现实例子让学生感悟模型的意义,引导学生灵活使用模型,培养学生的应用意识和创新意识。
+ + \ No newline at end of file diff --git a/dsRag/static/output.docx b/dsRag/static/output.docx new file mode 100644 index 00000000..bfa0714f Binary files /dev/null and b/dsRag/static/output.docx differ diff --git a/dsRag/小学数学问答.docx b/dsRag/小学数学问答.docx new file mode 100644 index 00000000..adcc0e10 Binary files /dev/null and b/dsRag/小学数学问答.docx differ