dsProject/dsRag/Start.py

import json
import subprocess
import tempfile
import urllib.parse
import uuid
import warnings
from io import BytesIO

import fastapi
import uvicorn
from fastapi import FastAPI, HTTPException
from openai import AsyncOpenAI
from sse_starlette import EventSourceResponse
from starlette.responses import StreamingResponse
from starlette.staticfiles import StaticFiles

from Config import Config
from Util.EsSearchUtil import *

# 初始化日志
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# 配置日志处理器
log_file = os.path.join(os.path.dirname(__file__), 'Logs', 'app.log')
os.makedirs(os.path.dirname(log_file), exist_ok=True)

# 文件处理器
file_handler = RotatingFileHandler(
    log_file, maxBytes=1024 * 1024, backupCount=5, encoding='utf-8')
file_handler.setFormatter(logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))

# 控制台处理器
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))

logger.addHandler(file_handler)
logger.addHandler(console_handler)

# 初始化异步 OpenAI 客户端
client = AsyncOpenAI(
    api_key=Config.MODEL_API_KEY,
    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)

async def lifespan(app: FastAPI):
    # 抑制HTTPS相关警告
    warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
    warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
    yield


app = FastAPI(lifespan=lifespan)

# 挂载静态文件目录
app.mount("/static", StaticFiles(directory="Static"), name="static")


@app.post("/api/save-word")
async def save_to_word(request: fastapi.Request):
    output_file = None
    try:
        # Parse request data
        try:
            data = await request.json()
            markdown_content = data.get('markdown_content', '')
            if not markdown_content:
                raise ValueError("Empty MarkDown content")
        except Exception as e:
            logger.error(f"Request parsing failed: {str(e)}")
            raise HTTPException(status_code=400, detail=f"Invalid request: {str(e)}")

        # 创建临时Markdown文件
        temp_md = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex + ".md")
        with open(temp_md, "w", encoding="utf-8") as f:
            f.write(markdown_content)

        # 使用pandoc转换
        output_file = os.path.join(tempfile.gettempdir(), "【理想大模型】问答.docx")
        subprocess.run(['pandoc', temp_md, '-o', output_file, '--resource-path=static'], check=True)

        # 读取生成的Word文件
        with open(output_file, "rb") as f:
            stream = BytesIO(f.read())

        # 返回响应
        encoded_filename = urllib.parse.quote("【理想大模型】问答.docx")
        return StreamingResponse(
            stream,
            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
            headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"})

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Unexpected error: {str(e)}")
        raise HTTPException(status_code=500, detail="Internal server error")
    finally:
        # 清理临时文件
        try:
            if temp_md and os.path.exists(temp_md):
                os.remove(temp_md)
            if output_file and os.path.exists(output_file):
                os.remove(output_file)
        except Exception as e:
            logger.warning(f"Failed to clean up temp files: {str(e)}")


@app.post("/api/rag", response_model=None)
async def rag(request: fastapi.Request):
    data = await request.json()
    query = data.get('query', '')
    query_tags = data.get('tags', [])
    # 调用es进行混合搜索
    search_results = EsSearchUtil.queryByEs(query, query_tags, logger)
    # 构建提示词
    context = "\n".join([
        f"结果{i + 1}: {res['tags']['full_content']}"
        for i, res in enumerate(search_results['text_results'])
    ])
    # 添加图片识别提示
    prompt = f"""
            信息检索与回答助手
            根据以下关于'{query}'的相关信息：

            基本信息
            - 语言: 中文
            - 描述: 根据提供的材料检索信息并回答问题
            - 特点: 快速准确提取关键信息，清晰简洁地回答

            相关信息
            {context}

            回答要求
            1. 请仔细甄别原问题与提供材料的关联性，不相关的材料必须忽略，绝对不要包含无关信息！
            2. 如果发现相关信息与原来的问题契合度低，请直接回答"未找到相关信息"
            3. 严格保持原文中图片与上下文的顺序关系，确保语义相关性
            4. 使用Markdown格式返回，包含适当的标题、列表和代码块
            5. 直接返回Markdown内容，不要包含额外解释或说明
            6. 依托给定的资料，快速准确地回答问题
            7. 如果未提供相关信息，请直接回答"未找到相关信息"
            8. 确保内容结构清晰，便于前端展示
            """

    async def generate_response_stream():
        try:
            # 流式调用大模型
            stream = await client.chat.completions.create(
                model=Config.MODEL_NAME,
                messages=[
                    {'role': 'user', 'content': prompt}
                ],
                max_tokens=8000,
                stream=True  # 启用流式模式
            )
            # 流式返回模型生成的回复
            async for chunk in stream:
                if chunk.choices[0].delta.content:
                    yield f"data: {json.dumps({'reply': chunk.choices[0].delta.content}, ensure_ascii=False)}\n\n"

        except Exception as e:
            yield f"data: {json.dumps({'error': str(e)})}\n\n"

    return EventSourceResponse(generate_response_stream())


if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
-												'commit'

											
										
										
											3 weeks ago
+								import json
-												'commit'

											
										
										
											3 weeks ago
+								import subprocess
 								import tempfile
-												'commit'

											
										
										
											4 weeks ago
+								import urllib.parse
-												'commit'

											
										
										
											3 weeks ago
+								import uuid
 								import warnings
 								from io import BytesIO
-												'commit'

											
										
										
											4 weeks ago
-												'commit'

											
										
										
											3 weeks ago
+								import fastapi
 								import uvicorn
 								from fastapi import FastAPI, HTTPException
-												'commit'

											
										
										
											3 weeks ago
+								from openai import AsyncOpenAI
 								from sse_starlette import EventSourceResponse
-												'commit'

											
										
										
											3 weeks ago
+								from starlette.responses import StreamingResponse
-												'commit'

											
										
										
											3 weeks ago
+								from starlette.staticfiles import StaticFiles
-												'commit'

											
										
										
											3 weeks ago
+								from Config import Config
-												'commit'

											
										
										
											3 weeks ago
+								from Util.EsSearchUtil import *
-												'commit'

											
										
										
											3 weeks ago
-												'commit'

											
										
										
											4 weeks ago
+								# 初始化日志
 								logger = logging.getLogger(__name__)
 								logger.setLevel(logging.INFO)
-												'commit'

											
										
										
											4 weeks ago
+								# 配置日志处理器
 								log_file = os.path.join(os.path.dirname(__file__), 'Logs', 'app.log')
 								os.makedirs(os.path.dirname(log_file), exist_ok=True)
 								# 文件处理器
 								file_handler = RotatingFileHandler(
-												'commit'

											
										
										
											3 weeks ago
+								    log_file, maxBytes=1024 * 1024, backupCount=5, encoding='utf-8')
-												'commit'

											
										
										
											4 weeks ago
+								file_handler.setFormatter(logging.Formatter(
 								    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 								# 控制台处理器
 								console_handler = logging.StreamHandler()
 								console_handler.setFormatter(logging.Formatter(
 								    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
 								logger.addHandler(file_handler)
 								logger.addHandler(console_handler)
-												'commit'

											
										
										
											3 weeks ago
+								# 初始化异步 OpenAI 客户端
 								client = AsyncOpenAI(
 								    api_key=Config.MODEL_API_KEY,
 								    base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
 								)
-												'commit'

											
										
										
											4 weeks ago
 								async def lifespan(app: FastAPI):
-												'commit'

											
										
										
											4 weeks ago
+								    # 抑制HTTPS相关警告
 								    warnings.filterwarnings('ignore', message='Connecting to .* using TLS with verify_certs=False is insecure')
 								    warnings.filterwarnings('ignore', message='Unverified HTTPS request is being made to host')
-												'commit'

											
										
										
											4 weeks ago
+								    yield
-												'commit'

											
										
										
											4 weeks ago
-												'commit'

											
										
										
											4 weeks ago
+								app = FastAPI(lifespan=lifespan)
 								# 挂载静态文件目录
 								app.mount("/static", StaticFiles(directory="Static"), name="static")
 								@app.post("/api/save-word")
-												'commit'

											
										
										
											3 weeks ago
+								async def save_to_word(request: fastapi.Request):
-												'commit'

											
										
										
											4 weeks ago
+								    output_file = None
 								    try:
 								        # Parse request data
 								        try:
 								            data = await request.json()
-												'commit'

											
										
										
											3 weeks ago
+								            markdown_content = data.get('markdown_content', '')
 								            if not markdown_content:
 								                raise ValueError("Empty MarkDown content")
-												'commit'

											
										
										
											4 weeks ago
+								        except Exception as e:
 								            logger.error(f"Request parsing failed: {str(e)}")
 								            raise HTTPException(status_code=400, detail=f"Invalid request: {str(e)}")
-												'commit'

											
										
										
											3 weeks ago
+								        # 创建临时Markdown文件
 								        temp_md = os.path.join(tempfile.gettempdir(), uuid.uuid4().hex + ".md")
 								        with open(temp_md, "w", encoding="utf-8") as f:
-												'commit'

											
										
										
											3 weeks ago
+								            f.write(markdown_content)
-												'commit'

											
										
										
											4 weeks ago
 								        # 使用pandoc转换
-												'commit'

											
										
										
											4 weeks ago
+								        output_file = os.path.join(tempfile.gettempdir(), "【理想大模型】问答.docx")
-												'commit'

											
										
										
											3 weeks ago
+								        subprocess.run(['pandoc', temp_md, '-o', output_file, '--resource-path=static'], check=True)
-												'commit'

											
										
										
											4 weeks ago
 								        # 读取生成的Word文件
 								        with open(output_file, "rb") as f:
 								            stream = BytesIO(f.read())
 								        # 返回响应
-												'commit'

											
										
										
											4 weeks ago
+								        encoded_filename = urllib.parse.quote("【理想大模型】问答.docx")
-												'commit'

											
										
										
											4 weeks ago
+								        return StreamingResponse(
 								            stream,
 								            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
 								            headers={"Content-Disposition": f"attachment; filename*=UTF-8''{encoded_filename}"})
 								    except HTTPException:
 								        raise
 								    except Exception as e:
 								        logger.error(f"Unexpected error: {str(e)}")
 								        raise HTTPException(status_code=500, detail="Internal server error")
 								    finally:
 								        # 清理临时文件
 								        try:
-												'commit'

											
										
										
											3 weeks ago
+								            if temp_md and os.path.exists(temp_md):
 								                os.remove(temp_md)
-												'commit'

											
										
										
											4 weeks ago
+								            if output_file and os.path.exists(output_file):
 								                os.remove(output_file)
 								        except Exception as e:
 								            logger.warning(f"Failed to clean up temp files: {str(e)}")
-												'commit'

											
										
										
											3 weeks ago
+								@app.post("/api/rag", response_model=None)
 								async def rag(request: fastapi.Request):
-												'commit'

											
										
										
											3 weeks ago
+								    data = await request.json()
 								    query = data.get('query', '')
 								    query_tags = data.get('tags', [])
-												'commit'

											
										
										
											3 weeks ago
+								    # 调用es进行混合搜索
-												'commit'

											
										
										
											3 weeks ago
+								    search_results = EsSearchUtil.queryByEs(query, query_tags, logger)
-												'commit'

											
										
										
											3 weeks ago
+								    # 构建提示词
 								    context = "\n".join([
 								        f"结果{i + 1}: {res['tags']['full_content']}"
-												'commit'

											
										
										
											3 weeks ago
+								        for i, res in enumerate(search_results['text_results'])
-												'commit'

											
										
										
											3 weeks ago
+								    ])
 								    # 添加图片识别提示
 								    prompt = f"""
 								            信息检索与回答助手
 								            根据以下关于'{query}'的相关信息：
 								            基本信息
 								            - 语言: 中文
 								            - 描述: 根据提供的材料检索信息并回答问题
 								            - 特点: 快速准确提取关键信息，清晰简洁地回答
 								            相关信息
 								            {context}
 								            回答要求
-												'commit'

											
										
										
											3 weeks ago
+. 请仔细甄别原问题与提供材料的关联性，不相关的材料必须忽略，绝对不要包含无关信息！
 . 如果发现相关信息与原来的问题契合度低，请直接回答"未找到相关信息"
 . 严格保持原文中图片与上下文的顺序关系，确保语义相关性
 . 使用Markdown格式返回，包含适当的标题、列表和代码块
 . 直接返回Markdown内容，不要包含额外解释或说明
 . 依托给定的资料，快速准确地回答问题
 . 如果未提供相关信息，请直接回答"未找到相关信息"
-												'commit'

											
										
										
											3 weeks ago
+. 确保内容结构清晰，便于前端展示
-												'commit'

											
										
										
											3 weeks ago
+								            """
-												'commit'

											
										
										
											3 weeks ago
-												'commit'

											
										
										
											3 weeks ago
+								    async def generate_response_stream():
 								        try:
 								            # 流式调用大模型
 								            stream = await client.chat.completions.create(
 								                model=Config.MODEL_NAME,
 								                messages=[
 								                    {'role': 'user', 'content': prompt}
 								                ],
 								                max_tokens=8000,
 								                stream=True  # 启用流式模式
 								            )
-												'commit'

											
										
										
											3 weeks ago
+								            # 流式返回模型生成的回复
 								            async for chunk in stream:
 								                if chunk.choices[0].delta.content:
 								                    yield f"data: {json.dumps({'reply': chunk.choices[0].delta.content}, ensure_ascii=False)}\n\n"
 								        except Exception as e:
 								            yield f"data: {json.dumps({'error': str(e)})}\n\n"
 								    return EventSourceResponse(generate_response_stream())
-												'commit'

											
										
										
											3 weeks ago
-												'commit'

											
										
										
											4 weeks ago
 								if __name__ == "__main__":
 								    uvicorn.run(app, host="0.0.0.0", port=8000)