You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

220 lines
8.0 KiB

4 months ago
import json
4 months ago
import uuid
4 months ago
from datetime import date, datetime
from asyncpg.pool import Pool
4 months ago
from fastapi import FastAPI, Form, Query
4 months ago
from fastapi.staticfiles import StaticFiles
from contextlib import asynccontextmanager
from fastapi import FastAPI, Depends
import asyncpg
import uvicorn
from openai import AsyncOpenAI
4 months ago
from starlette.responses import StreamingResponse
4 months ago
4 months ago
from Config import *
4 months ago
from Model.biModel import *
4 months ago
from Text2Sql.Util.MarkdownToDocxUtil import markdown_to_docx
4 months ago
from Text2Sql.Util.SaveToExcel import save_to_excel
4 months ago
from Text2Sql.Util.VannaUtil import VannaUtil
# 初始化 FastAPI
app = FastAPI()
4 months ago
app.mount("/static", StaticFiles(directory="static"), name="static")
4 months ago
4 months ago
vn = VannaUtil()
4 months ago
# 初始化 FastAPI 应用
@asynccontextmanager
async def lifespan(app: FastAPI):
# 启动时初始化连接池
app.state.pool = await asyncpg.create_pool(
host=PG_HOST,
port=PG_PORT,
database=PG_DATABASE,
user=PG_USER,
password=PG_PASSWORD,
min_size=1,
max_size=10
)
yield
# 关闭时释放连接池
await app.state.pool.close()
app = FastAPI(lifespan=lifespan)
# 依赖注入连接池
async def get_db():
async with app.state.pool.acquire() as connection:
yield connection
class PostgreSQLUtil:
def __init__(self, pool: Pool):
self.pool = pool
async def execute_query(self, sql, params=None):
async with self.pool.acquire() as connection:
result = await connection.fetch(sql, params)
return result
async def query_to_json(self, sql, params=None):
data = await self.execute_query(sql, params)
return json.dumps(data, default=self.json_serializer)
@staticmethod
def json_serializer(obj):
"""处理JSON无法序列化的类型"""
if isinstance(obj, (date, datetime)):
return obj.isoformat()
raise TypeError(f"Type {type(obj)} not serializable")
async def create_pool():
return await asyncpg.create_pool(
host=PG_HOST,
port=PG_PORT,
database=PG_DATABASE,
user=PG_USER,
password=PG_PASSWORD,
min_size=1,
max_size=10
)
4 months ago
4 months ago
@app.post("/questions/get_excel")
4 months ago
async def get_excel(question_id: str = Form(...), question_str: str = Form(...), db: asyncpg.Connection = Depends(get_db)):
4 months ago
# 只接受guid号
if len(question_id) != 36:
return {"success": False, "message": "question_id格式错误"}
4 months ago
4 months ago
common_prompt = '''
返回的信息要求
1行政区划为NULL 或者是空字符的不参加统计
2目标数据库是Postgresql 16
'''
4 months ago
question = question_str + common_prompt
# 先删除后插入,防止重复插入
4 months ago
await delete_question(db, question_id)
await insert_question(db, question_id, question)
4 months ago
4 months ago
# 获取完整 SQL
sql = vn.generate_sql(question)
print("生成的查询 SQL:\n", sql)
4 months ago
# 更新question_id
4 months ago
await update_question_by_id(db, question_id=question_id, sql=sql, state_id=1)
4 months ago
# 执行SQL查询
4 months ago
_data = await db.fetch(sql)
4 months ago
# 在static目录下生成一个guid号的临时文件
uuid_str = str(uuid.uuid4())
filename = f"static/{uuid_str}.xlsx"
save_to_excel(_data, filename)
# 更新EXCEL文件名称
4 months ago
await update_question_by_id(db, question_id, excel_file_name=filename)
4 months ago
# 返回静态文件URL
return {"success": True, "message": "Excel文件生成成功", "download_url": f"/static/{uuid_str}.xlsx"}
4 months ago
# http://10.10.21.20:8000/questions/get_docx_stream?question_id_get=af15d834-e7f5-46b4-a0f6-15f1f888f443
4 months ago
# 初始化 OpenAI 客户端
client = AsyncOpenAI(
api_key=MODEL_API_KEY,
base_url=MODEL_API_URL,
)
4 months ago
@app.api_route("/questions/get_docx_stream", methods=["POST", "GET"])
async def get_docx_stream(
question_id: str = Form(None, description="问题IDPOST请求"), # POST 请求参数
4 months ago
question_id_get: str = Query(None, description="问题IDGET请求"), # GET 请求参数
db: asyncpg.Connection = Depends(get_db)
4 months ago
):
# 根据请求方式获取 question_id
if question_id is not None: # POST 请求
question_id = question_id
elif question_id_get is not None: # GET 请求
question_id = question_id_get
else:
return {"success": False, "message": "缺少问题ID参数"}
# 根据问题ID获取查询sql
4 months ago
sql = (await db.fetch("SELECT * FROM t_bi_question WHERE id = $1", question_id))[0]['sql']
# 生成word报告
4 months ago
prompt = '''
请根据以下 JSON 数据整理出2000字左右的话描述当前数据情况要求
1以Markdown格式返回我将直接通过markdown格式生成Word
2标题统一为长春云校数据分析报告
3内容中不要提到JSON数据统一称数据
4尽量以条目列出这样更清晰
5数据
'''
4 months ago
_data = await db.fetch(sql)
#print(_data)
# 将 asyncpg.Record 转换为 JSON 格式
json_data = json.dumps([dict(record) for record in _data], ensure_ascii=False)
print(json_data) # 打印 JSON 数据
prompt = prompt + json.dumps(json_data, ensure_ascii=False)
4 months ago
# 调用 OpenAI API 生成总结(流式输出)
4 months ago
response = await client.chat.completions.create(
4 months ago
model=QWEN_MODEL_NAME,
4 months ago
messages=[
{"role": "system", "content": "你是一个数据分析助手,擅长从 JSON 数据中提取关键信息并生成详细的总结。"},
{"role": "user", "content": prompt}
],
max_tokens=3000, # 控制生成内容的长度
temperature=0.7, # 控制生成内容的创造性
stream=True # 启用流式输出
)
4 months ago
# 生成 Word 文档的文件名
4 months ago
uuid_str = str(uuid.uuid4())
filename = f"static/{uuid_str}.docx"
4 months ago
4 months ago
# 定义一个生成器函数,用于逐字返回流式结果
async def generate_stream():
summary = ""
try:
4 months ago
async for chunk in response: # 使用 async for 处理流式响应
4 months ago
if chunk.choices[0].delta.content: # 检查是否有内容
chunk_content = chunk.choices[0].delta.content
# 逐字拆分并返回
for char in chunk_content:
4 months ago
print(char, end="", flush=True) # 逐字输出到控制台
4 months ago
yield char.encode("utf-8") # 将字符编码为 UTF-8 字节
summary += char # 将内容拼接到 summary 中
# 流式传输完成后,生成 Word 文档
markdown_to_docx(summary, output_file=filename)
4 months ago
# 记录到数据库
4 months ago
await db.execute("UPDATE t_bi_question SET docx_file_name = $1 WHERE id = $2", filename, question_id)
4 months ago
4 months ago
except Exception as e:
# 如果发生异常,返回错误信息
error_response = json.dumps({
"success": False,
"message": f"生成Word文件失败: {str(e)}"
})
4 months ago
print(error_response) # 输出错误信息到控制台
4 months ago
yield error_response.encode("utf-8") # 将错误信息编码为 UTF-8 字节
4 months ago
4 months ago
finally:
# 确保资源释放
if "response" in locals():
4 months ago
await response.aclose()
4 months ago
# 使用 StreamingResponse 返回流式结果
return StreamingResponse(
generate_stream(),
media_type="text/plain; charset=utf-8", # 明确指定字符编码为 UTF-8
headers={
"Cache-Control": "no-cache", # 禁用缓存
4 months ago
"Content-Type": "text/event-stream; charset=utf-8", # 设置内容类型和字符编码
"Transfer-Encoding": "chunked",
"Connection": "keep-alive",
4 months ago
"X-Accel-Buffering": "no", # 禁用 Nginx 缓冲(如果使用 Nginx
4 months ago
}
)
4 months ago
4 months ago
# 启动 FastAPI
4 months ago
if __name__ == "__main__":
4 months ago
uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)