diff --git a/dsRag/StartEs.py b/dsRag/StartEs.py new file mode 100644 index 00000000..d7b5d900 --- /dev/null +++ b/dsRag/StartEs.py @@ -0,0 +1,121 @@ +import os +import subprocess +import tempfile +import urllib.parse +import uuid +from contextlib import asynccontextmanager +from io import BytesIO +from logging.handlers import RotatingFileHandler +from typing import List + +import jieba # 导入 jieba 分词库 +import uvicorn +from fastapi import FastAPI, Request, HTTPException +from fastapi.staticfiles import StaticFiles +from gensim.models import KeyedVectors +from pydantic import BaseModel, Field, ValidationError +from starlette.responses import StreamingResponse + +from Config.Config import MS_MODEL_PATH, MS_MODEL_LIMIT, MS_HOST, MS_PORT, MS_MAX_CONNECTIONS, MS_NPROBE, \ + MS_COLLECTION_NAME +from Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager +from Milvus.Utils.MilvusConnectionPool import * +from Milvus.Utils.MilvusConnectionPool import MilvusConnectionPool +from Util.ALiYunUtil import ALiYunUtil + +# 初始化日志 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = RotatingFileHandler('Logs/start.log', maxBytes=1024 * 1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) + + +# 将HTML文件转换为Word文件 +def html_to_word_pandoc(html_file, output_file): + subprocess.run(['pandoc', html_file, '-o', output_file]) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + # 初始化阿里云大模型工具 + app.state.aliyun_util = ALiYunUtil() + yield + pass + +app = FastAPI(lifespan=lifespan) + +# 挂载静态文件目录 +app.mount("/static", StaticFiles(directory="Static"), name="static") + + + +class QueryRequest(BaseModel): + query: str = Field(..., description="用户查询的问题") + documents: List[str] = Field(..., description="用户上传的文档") + + +class SaveWordRequest(BaseModel): + html: str = Field(..., description="要保存为Word的HTML内容") + + +@app.post("/api/save-word") +async def save_to_word(request: Request): + temp_html = None + output_file = None + try: + # Parse request data + try: + data = await request.json() + html_content = data.get('html_content', '') + if not html_content: + raise ValueError("Empty HTML content") + except Exception as e: + logger.error(f"Request parsing failed: {str(e)}") + raise HTTPException(status_code=400, detail=f"Invalid request: {str(e)}") + + # 创建临时HTML文件 + temp_html = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex + ".html") + with open(temp_html, "w", encoding="utf-8") as f: + f.write(html_content) + + # 使用pandoc转换 + output_file = os.path.join(tempfile.gettempdir(), "小学数学问答.docx") + subprocess.run(['pandoc', temp_html, '-o', output_file], check=True) + + # 读取生成的Word文件 + with open(output_file, "rb") as f: + stream = BytesIO(f.read()) + + # 返回响应 + encoded_filename = urllib.parse.quote("小学数学问答.docx") + return StreamingResponse( + stream, + media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document", + headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"}) + + except HTTPException: + raise + except Exception as e: + logger.error(f"Unexpected error: {str(e)}") + raise HTTPException(status_code=500, detail="Internal server error") + finally: + # 清理临时文件 + try: + if temp_html and os.path.exists(temp_html): + os.remove(temp_html) + if output_file and os.path.exists(output_file): + os.remove(output_file) + except Exception as e: + logger.warning(f"Failed to clean up temp files: {str(e)}") + + +@app.post("/api/rag") +async def rag_stream(request: Request): + pass + # todo + + + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000)