|
|
|
|
import os
|
|
|
|
|
import platform
|
|
|
|
|
from Text2Sql.Util.Text2SqlUtil import *
|
|
|
|
|
from Text2Sql.Util.PostgreSQLUtil import PostgreSQLUtil
|
|
|
|
|
from Text2Sql.Util.SaveToExcel import save_to_excel
|
|
|
|
|
from pyecharts.charts import Bar
|
|
|
|
|
from pyecharts import options as opts
|
|
|
|
|
from pyecharts.commons.utils import JsCode
|
|
|
|
|
import pandas as pd
|
|
|
|
|
import webbrowser
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def process_data(raw_data):
|
|
|
|
|
"""
|
|
|
|
|
处理原始数据:
|
|
|
|
|
1. 过滤无效行政区
|
|
|
|
|
2. 按上传数量降序排序
|
|
|
|
|
3. 取前10名
|
|
|
|
|
"""
|
|
|
|
|
df = pd.DataFrame(raw_data)
|
|
|
|
|
df = df[df['行政区划名称'].notna() & (df['行政区划名称'] != '')]
|
|
|
|
|
df = df.sort_values('上传课程数量', ascending=False).head(10)
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_top10_chart(data):
|
|
|
|
|
# 准备数据
|
|
|
|
|
schools = data['学校名称'].tolist()
|
|
|
|
|
counts = data['上传课程数量'].tolist()
|
|
|
|
|
districts = data['行政区划名称'].tolist()
|
|
|
|
|
|
|
|
|
|
# 生成颜色渐变(从深到浅)
|
|
|
|
|
colors = [f"rgb({75 + i * 15}, {115 - i * 10}, {220 - i * 20})" for i in range(10)]
|
|
|
|
|
|
|
|
|
|
# 创建图表
|
|
|
|
|
bar = Bar(init_opts=opts.InitOpts(width='1200px', height='600px'))
|
|
|
|
|
bar.add_xaxis(schools)
|
|
|
|
|
bar.add_yaxis(
|
|
|
|
|
series_name="上传数量",
|
|
|
|
|
y_axis=counts,
|
|
|
|
|
itemstyle_opts=opts.ItemStyleOpts(color=JsCode(
|
|
|
|
|
"function(params){"
|
|
|
|
|
f" return {colors}[params.dataIndex];"
|
|
|
|
|
"}"
|
|
|
|
|
)),
|
|
|
|
|
label_opts=opts.LabelOpts(
|
|
|
|
|
position="right",
|
|
|
|
|
formatter=JsCode(
|
|
|
|
|
"function(params){"
|
|
|
|
|
" return params.value + ' (' + params.name.split('').join('\\n') + ')';"
|
|
|
|
|
"}"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 全局配置
|
|
|
|
|
bar.set_global_opts(
|
|
|
|
|
title_opts=opts.TitleOpts(
|
|
|
|
|
title="上传资源数量TOP10学校排名",
|
|
|
|
|
subtitle="数据来源:课程资源管理系统"
|
|
|
|
|
),
|
|
|
|
|
tooltip_opts=opts.TooltipOpts(
|
|
|
|
|
formatter=JsCode(
|
|
|
|
|
"""function(params){
|
|
|
|
|
return params.name + '<br/>'
|
|
|
|
|
+ '所属行政区:' + %s[params.dataIndex] + '<br/>'
|
|
|
|
|
+ '上传数量:<b style="color:#5470C6">' + params.value + '</b>'
|
|
|
|
|
}""" % districts
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
xaxis_opts=opts.AxisOpts(
|
|
|
|
|
axislabel_opts=opts.LabelOpts(
|
|
|
|
|
rotate=30,
|
|
|
|
|
formatter=JsCode(
|
|
|
|
|
"function(value){"
|
|
|
|
|
" return value.length > 6 ? value.substring(0,6)+'...' : value;"
|
|
|
|
|
"}"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
),
|
|
|
|
|
yaxis_opts=opts.AxisOpts(name="上传数量(件)"),
|
|
|
|
|
datazoom_opts=[opts.DataZoomOpts(type_="inside")],
|
|
|
|
|
visualmap_opts=opts.VisualMapOpts(
|
|
|
|
|
min_=min(counts),
|
|
|
|
|
max_=max(counts),
|
|
|
|
|
orient="horizontal",
|
|
|
|
|
pos_left="center",
|
|
|
|
|
range_color=["#91CC75", "#5470C6"]
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 反转Y轴使降序排列
|
|
|
|
|
bar.reversal_axis()
|
|
|
|
|
|
|
|
|
|
return bar
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
vn = DeepSeekVanna()
|
|
|
|
|
|
|
|
|
|
# 开始训练
|
|
|
|
|
print("开始训练...")
|
|
|
|
|
# 打开CreateTable.sql文件内容
|
|
|
|
|
with open("Sql/CreateTable.sql", "r", encoding="utf-8") as file:
|
|
|
|
|
ddl = file.read()
|
|
|
|
|
# 训练数据
|
|
|
|
|
vn.train(
|
|
|
|
|
ddl=ddl
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# 自然语言提问
|
|
|
|
|
# '''
|
|
|
|
|
question = '''
|
|
|
|
|
查询发布时间是2024年度,每个行政区划每个学校都上传了多少课程数量,
|
|
|
|
|
返回: 行政区划名,学段,排名,学校名称,课程数量
|
|
|
|
|
'''
|
|
|
|
|
common_prompt = '''
|
|
|
|
|
要求:
|
|
|
|
|
1、行政区划为NULL 或者是空字符的不参加统计工作,
|
|
|
|
|
'''
|
|
|
|
|
question = question + common_prompt
|
|
|
|
|
# 开始查询
|
|
|
|
|
print("开始查询...")
|
|
|
|
|
# 获取完整 SQL
|
|
|
|
|
sql = vn.generate_sql(question)
|
|
|
|
|
print("生成的查询 SQL:\n", sql)
|
|
|
|
|
|
|
|
|
|
# 执行SQL查询
|
|
|
|
|
with PostgreSQLUtil() as db:
|
|
|
|
|
sample_data = db.execute_query(sql)
|
|
|
|
|
filename = "d:/导出信息.xlsx"
|
|
|
|
|
save_to_excel(sample_data, filename)
|
|
|
|
|
os.startfile(filename)
|