diff --git a/dsRag/Start.py b/dsRag/Start.py index ba49b059..6e56680e 100644 --- a/dsRag/Start.py +++ b/dsRag/Start.py @@ -15,6 +15,10 @@ from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT, MS_HOST, MS_PORT, MS_MA from Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from Milvus.Utils.MilvusConnectionPool import * from Milvus.Utils.MilvusConnectionPool import MilvusConnectionPool +from docx import Document +from docx.shared import Inches +from io import BytesIO +import html2text # 初始化日志 logger = logging.getLogger(__name__) @@ -156,6 +160,42 @@ http://10.10.21.22:8000/static/ai.html class QueryRequest(BaseModel): query: str = Field(..., description="用户查询的问题") +class SaveWordRequest(BaseModel): + html: str = Field(..., description="要保存为Word的HTML内容") + +@app.post("/api/save-word") +async def save_to_word(request: Request): + try: + data = await request.json() + save_request = SaveWordRequest(**data) + except ValidationError as e: + logger.error(f"请求体验证失败: {e.errors()}") + raise HTTPException(status_code=422, detail=e.errors()) + except Exception as e: + logger.error(f"请求解析失败: {str(e)}") + raise HTTPException(status_code=400, detail="无效的请求格式") + + # 转换HTML为纯文本 + h = html2text.HTML2Text() + h.ignore_links = True + plain_text = h.handle(save_request.html) + + # 创建Word文档 + doc = Document() + doc.add_heading('小学数学问答', 0) + doc.add_paragraph(plain_text) + + # 保存到内存中的字节流 + file_stream = BytesIO() + doc.save(file_stream) + file_stream.seek(0) + + return StreamingResponse( + file_stream, + media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + headers={"Content-Disposition": "attachment; filename=小学数学问答.docx"} + ) + @app.post("/api/rag") async def rag_stream(request: Request): try: diff --git a/dsRag/Test/TestWriteDoc.py b/dsRag/Test/TestWriteDoc.py new file mode 100644 index 00000000..dc8469e1 --- /dev/null +++ b/dsRag/Test/TestWriteDoc.py @@ -0,0 +1,44 @@ +from bs4 import BeautifulSoup +from docx import Document +""" +pip install python-docx html2text beautifulsoup4 +""" + +def html_to_word(html_content, word_path): + # 解析 HTML + soup = BeautifulSoup(html_content, 'html.parser') + + # 创建 Word 文档 + doc = Document() + + # 遍历 HTML 的所有段落 + for element in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div']): + # 获取文本内容 + text = element.get_text(strip=True) + if text: + # 添加到 Word 文档 + doc.add_paragraph(text) + + # 保存 Word 文档 + doc.save(word_path) + print(f"HTML content saved to {word_path}") + + +# 示例 HTML 内容 +html_content = """ + + + + Sample HTML + + +

Heading 1

+

This is a paragraph.

+

Heading 2

+
Content inside a div.
+ + +""" + +# 调用函数 +html_to_word(html_content, "output.docx") \ No newline at end of file diff --git a/dsRag/Test/__init__.py b/dsRag/Test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/dsRag/Test/output.docx b/dsRag/Test/output.docx new file mode 100644 index 00000000..e389e623 Binary files /dev/null and b/dsRag/Test/output.docx differ diff --git a/dsRag/static/ai.html b/dsRag/static/ai.html index ead942c5..b724b497 100644 --- a/dsRag/static/ai.html +++ b/dsRag/static/ai.html @@ -90,7 +90,7 @@ background-color: #d32f2f; } - #downloadBtn { + #saveWordBtn { background-color: #4CAF50; color: white; border: none; @@ -99,12 +99,20 @@ border-radius: 5px; cursor: pointer; transition: background-color 0.3s; + display: flex; + align-items: center; + gap: 8px; } - #downloadBtn:hover { + #saveWordBtn:hover { background-color: #45a049; } + #saveWordBtn::before { + content: "\1F4C4"; /* Word文档图标 */ + font-size: 18px; + } + .status { text-align: center; margin-bottom: 20px; @@ -163,7 +171,38 @@ + + +