'commit'

4 weeks ago · 6a888b8cb8
parent e6c0c618a9
commit 6a888b8cb8
5 changed files with 125 additions and 2 deletions
--- a/dsRag/Start.py
+++ b/dsRag/Start.py
@ -15,6 +15,10 @@ from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT, MS_HOST, MS_PORT, MS_MA
 from Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager
 from Milvus.Utils.MilvusConnectionPool import *
 from Milvus.Utils.MilvusConnectionPool import MilvusConnectionPool
+from docx import Document
+from docx.shared import Inches
+from io import BytesIO
+import html2text

 # 初始化日志
 logger = logging.getLogger(__name__)
@ -156,6 +160,42 @@ http://10.10.21.22:8000/static/ai.html
 class QueryRequest(BaseModel):
    query: str = Field(..., description="用户查询的问题")

+class SaveWordRequest(BaseModel):
+    html: str = Field(..., description="要保存为Word的HTML内容")
+
+@app.post("/api/save-word")
+async def save_to_word(request: Request):
+    try:
+        data = await request.json()
+        save_request = SaveWordRequest(**data)
+    except ValidationError as e:
+        logger.error(f"请求体验证失败: {e.errors()}")
+        raise HTTPException(status_code=422, detail=e.errors())
+    except Exception as e:
+        logger.error(f"请求解析失败: {str(e)}")
+        raise HTTPException(status_code=400, detail="无效的请求格式")
+    
+    # 转换HTML为纯文本
+    h = html2text.HTML2Text()
+    h.ignore_links = True
+    plain_text = h.handle(save_request.html)
+    
+    # 创建Word文档
+    doc = Document()
+    doc.add_heading('小学数学问答', 0)
+    doc.add_paragraph(plain_text)
+    
+    # 保存到内存中的字节流
+    file_stream = BytesIO()
+    doc.save(file_stream)
+    file_stream.seek(0)
+    
+    return StreamingResponse(
+        file_stream,
+        media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+        headers={"Content-Disposition": "attachment; filename=小学数学问答.docx"}
+    )
+
@app.post("/api/rag")
 async def rag_stream(request: Request):
    try:
--- a/dsRag/Test/TestWriteDoc.py
+++ b/dsRag/Test/TestWriteDoc.py
@ -0,0 +1,44 @@
+from bs4 import BeautifulSoup
+from docx import Document
+"""
+pip install python-docx html2text beautifulsoup4
+"""
+
+def html_to_word(html_content, word_path):
+    # 解析 HTML
+    soup = BeautifulSoup(html_content, 'html.parser')
+
+    # 创建 Word 文档
+    doc = Document()
+
+    # 遍历 HTML 的所有段落
+    for element in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'div']):
+        # 获取文本内容
+        text = element.get_text(strip=True)
+        if text:
+            # 添加到 Word 文档
+            doc.add_paragraph(text)
+
+    # 保存 Word 文档
+    doc.save(word_path)
+    print(f"HTML content saved to {word_path}")
+
+
+# 示例 HTML 内容
+html_content = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Sample HTML</title>
+</head>
+<body>
+    <h1>Heading 1</h1>
+    <p>This is a paragraph.</p>
+    <h2>Heading 2</h2>
+    <div>Content inside a div.</div>
+</body>
+</html>
+"""
+
+# 调用函数
+html_to_word(html_content, "output.docx")
--- a/dsRag/Test/init.py
+++ b/dsRag/Test/init.py
--- a/dsRag/Test/output.docx
+++ b/dsRag/Test/output.docx
--- a/dsRag/static/ai.html
+++ b/dsRag/static/ai.html
@ -90,7 +90,7 @@
            background-color: #d32f2f;
        }

-        #downloadBtn {
+        #saveWordBtn {
            background-color: #4CAF50;
            color: white;
            border: none;
@ -99,12 +99,20 @@
            border-radius: 5px;
            cursor: pointer;
            transition: background-color 0.3s;
+            display: flex;
+            align-items: center;
+            gap: 8px;
        }

-        #downloadBtn:hover {
+        #saveWordBtn:hover {
            background-color: #45a049;
        }

+        #saveWordBtn::before {
+            content: "\1F4C4"; /* Word文档图标 */
+            font-size: 18px;
+        }
+
        .status {
            text-align: center;
            margin-bottom: 20px;
@ -163,7 +171,38 @@
        <input type="text" id="questionInput" placeholder="请输入您的问题，例如：小学数学的学习方法">
        <button id="submitBtn" onclick="submitQuestion()">提问</button>
        <button id="clearBtn" onclick="clearAll()">清空</button>
+        <button id="saveWordBtn" onclick="saveToWord()">
+            <span class="text">保存为Word</span>
+        </button>
    </div>
+
+    <script>
+        function saveToWord() {
+            const htmlContent = document.getElementById('dataArea').innerHTML;
+            if (!htmlContent || htmlContent === '等待问题...') {
+                alert('请先生成内容再保存！');
+                return;
+            }
+
+            fetch('/api/save-word', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify({html: htmlContent})
+            })
+            .then(response => response.blob())
+            .then(blob => {
+                const url = window.URL.createObjectURL(blob);
+                const a = document.createElement('a');
+                a.href = url;
+                a.download = '小学数学问答.docx';
+                document.body.appendChild(a);
+                a.click();
+                document.body.removeChild(a);
+            });
+        }
+    </script>
 </div>

 <script>