You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
147 lines
4.9 KiB
147 lines
4.9 KiB
import os
|
|
import platform
|
|
from Text2Sql.Util.Text2SqlUtil import *
|
|
from Text2Sql.Util.PostgreSQLUtil import PostgreSQLUtil
|
|
from Text2Sql.Util.SaveToExcel import save_to_excel
|
|
from pyecharts.charts import Bar
|
|
from pyecharts import options as opts
|
|
from pyecharts.commons.utils import JsCode
|
|
import pandas as pd
|
|
import webbrowser
|
|
|
|
|
|
def process_data(raw_data):
|
|
"""
|
|
处理原始数据:
|
|
1. 过滤无效行政区
|
|
2. 按上传数量降序排序
|
|
3. 取前10名
|
|
"""
|
|
df = pd.DataFrame(raw_data)
|
|
df = df[df['行政区划名称'].notna() & (df['行政区划名称'] != '')]
|
|
df = df.sort_values('上传课程数量', ascending=False).head(10)
|
|
return df
|
|
|
|
|
|
def create_top10_chart(data):
|
|
# 准备数据
|
|
schools = data['学校名称'].tolist()
|
|
counts = data['上传课程数量'].tolist()
|
|
districts = data['行政区划名称'].tolist()
|
|
|
|
# 生成颜色渐变(从深到浅)
|
|
colors = [f"rgb({75 + i * 15}, {115 - i * 10}, {220 - i * 20})" for i in range(10)]
|
|
|
|
# 创建图表
|
|
bar = Bar(init_opts=opts.InitOpts(width='1200px', height='600px'))
|
|
bar.add_xaxis(schools)
|
|
bar.add_yaxis(
|
|
series_name="上传数量",
|
|
y_axis=counts,
|
|
itemstyle_opts=opts.ItemStyleOpts(color=JsCode(
|
|
"function(params){"
|
|
f" return {colors}[params.dataIndex];"
|
|
"}"
|
|
)),
|
|
label_opts=opts.LabelOpts(
|
|
position="right",
|
|
formatter=JsCode(
|
|
"function(params){"
|
|
" return params.value + ' (' + params.name.split('').join('\\n') + ')';"
|
|
"}"
|
|
)
|
|
)
|
|
)
|
|
|
|
# 全局配置
|
|
bar.set_global_opts(
|
|
title_opts=opts.TitleOpts(
|
|
title="上传资源数量TOP10学校排名",
|
|
subtitle="数据来源:课程资源管理系统"
|
|
),
|
|
tooltip_opts=opts.TooltipOpts(
|
|
formatter=JsCode(
|
|
"""function(params){
|
|
return params.name + '<br/>'
|
|
+ '所属行政区:' + %s[params.dataIndex] + '<br/>'
|
|
+ '上传数量:<b style="color:#5470C6">' + params.value + '</b>'
|
|
}""" % districts
|
|
)
|
|
),
|
|
xaxis_opts=opts.AxisOpts(
|
|
axislabel_opts=opts.LabelOpts(
|
|
rotate=30,
|
|
formatter=JsCode(
|
|
"function(value){"
|
|
" return value.length > 6 ? value.substring(0,6)+'...' : value;"
|
|
"}"
|
|
)
|
|
)
|
|
),
|
|
yaxis_opts=opts.AxisOpts(name="上传数量(件)"),
|
|
datazoom_opts=[opts.DataZoomOpts(type_="inside")],
|
|
visualmap_opts=opts.VisualMapOpts(
|
|
min_=min(counts),
|
|
max_=max(counts),
|
|
orient="horizontal",
|
|
pos_left="center",
|
|
range_color=["#91CC75", "#5470C6"]
|
|
)
|
|
)
|
|
|
|
# 反转Y轴使降序排列
|
|
bar.reversal_axis()
|
|
|
|
return bar
|
|
|
|
|
|
if __name__ == "__main__":
|
|
vn = DeepSeekVanna()
|
|
|
|
# 开始训练
|
|
print("开始训练...")
|
|
# 打开CreateTable.sql文件内容
|
|
with open("Sql/AreaSchoolLesson.sql", "r", encoding="utf-8") as file:
|
|
ddl = file.read()
|
|
# 训练数据
|
|
vn.train(
|
|
ddl=ddl
|
|
)
|
|
|
|
# 自然语言提问
|
|
question = '''
|
|
查询每个区每个校都上传了多少课程数量,需要返回行政区名称,学校名称,上传课程数量等属性.
|
|
字段名:行政区划名称,学校名称,上传课程数量,
|
|
行政区划为NULL 或者是空字符的不参加统计工作,
|
|
先按行政区划排序,再按课程数量由高到低排序'''
|
|
# ,只要行政区划是二道区的
|
|
|
|
# question = '''
|
|
# 查询小学语文每个章节下资源的数量,除了章节名称和资源数量外,
|
|
# 还需要返回所属册的名称,
|
|
# 按册进行排序,第二级排序使用章节号,
|
|
# 输出顺序为:册名称,章节名称,资源数量,字段名称也按上面的中文来描述'''
|
|
|
|
# 开始查询
|
|
print("开始查询...")
|
|
# 获取完整 SQL
|
|
sql = vn.generate_sql(question)
|
|
print("生成的查询 SQL:\n", sql)
|
|
|
|
# 执行SQL查询
|
|
with PostgreSQLUtil() as db:
|
|
sample_data = db.execute_query(sql)
|
|
filename = "d:/导出信息.xlsx"
|
|
save_to_excel(sample_data, filename)
|
|
# 用WINDOWS打开这个xlsx
|
|
if platform.system() == 'Windows':
|
|
os.startfile(filename)
|
|
else:
|
|
# 对于其他系统,使用默认程序打开文件
|
|
os.system(f'open "{filename}"')
|
|
# 生成统计图
|
|
processed_data = process_data(sample_data)
|
|
chart = create_top10_chart(processed_data)
|
|
chart.render("top10_chart.html")
|
|
webbrowser.open("top10_chart.html")
|