From ae86048959e7d03846cecf5b491a5f430de53958 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sat, 15 Mar 2025 09:00:02 +0800 Subject: [PATCH] 'commit' --- AI/Text2Sql/Model/__init__.py | 0 .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 141 bytes .../Model/__pycache__/biModel.cpython-310.pyc | Bin 0 -> 2390 bytes AI/Text2Sql/Model/biModel.py | 70 +++++++++ AI/Text2Sql/{YunXiao.py => Train.py} | 0 AI/Text2Sql/Util/SaveToExcel.py | 80 +++++++++- AI/Text2Sql/Util/VannaUtil.py | 107 +++++++++++--- .../__pycache__/SaveToExcel.cpython-310.pyc | Bin 1805 -> 3076 bytes .../__pycache__/VannaUtil.cpython-310.pyc | Bin 5226 -> 7612 bytes AI/Text2Sql/__pycache__/app.cpython-310.pyc | Bin 5712 -> 3105 bytes AI/Text2Sql/app.py | 138 +++--------------- AI/Text2Sql/vanna.db | Bin 0 -> 20480 bytes 12 files changed, 254 insertions(+), 141 deletions(-) create mode 100644 AI/Text2Sql/Model/__init__.py create mode 100644 AI/Text2Sql/Model/__pycache__/__init__.cpython-310.pyc create mode 100644 AI/Text2Sql/Model/__pycache__/biModel.cpython-310.pyc create mode 100644 AI/Text2Sql/Model/biModel.py rename AI/Text2Sql/{YunXiao.py => Train.py} (100%) create mode 100644 AI/Text2Sql/vanna.db diff --git a/AI/Text2Sql/Model/__init__.py b/AI/Text2Sql/Model/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/AI/Text2Sql/Model/__pycache__/__init__.cpython-310.pyc b/AI/Text2Sql/Model/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c7c72b0ef40d070d990dd1c79ff8d1517bd3d49 GIT binary patch literal 141 zcmd1j<>g`kg1cw0q=V?kAOaaM0yz#qT+9L_QW%06G#UL?G8BP?5yUS87ps_*;_&>U z?3lpJymX)Zy!04H&zO+ZiV~yX!kif2{FKz3nE3e2yv&mLc)fzkTO2mI`6;D2sdgZf KikW}}3j+XPYai19 literal 0 HcmV?d00001 diff --git a/AI/Text2Sql/Model/__pycache__/biModel.cpython-310.pyc b/AI/Text2Sql/Model/__pycache__/biModel.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d3db5a1a12c2a29f8ac23b95dec0dd3aed9a6b4 GIT binary patch literal 2390 zcmbuB&u<$=6vubGKfGSA?ahz2P=2)h5|~0l)0UQ~LQR_%BBc?n9U+Y(jn+F8cj^9c zXV+ESYq<8#zrckfSN;P|oaP9p{tH~-y|Eo{ON)@O3y#CarZQ&gowDXn0UEcWK;0@6^HfaF9$D80c1p$8v ze4n?#Z)ty-w|hpn^9*KZbct%*3=45L=OXDksuf8QmV$|F7-vE`xAJ71W~#oEm0~EQ za(bnVv!U{PX_zGQsI?7!AXUIcl!P)Y+AFA=5`Tt5$;~Re0Af>wM(CWapU4=YxsA0?drwNE0-^AtQSwdd&2nO zPS;dkI4<)jPt&+m4dlaQTs%@yfTOlDVFjFp&t>Pic8OO!{|VlEv7cRo>B*KSebri{5V zVdJQkK`2@DAe1FLVv*jVMKw4%37#FrV%9Q(48t65ci!Rj;@aad8AH42UiW4j7KiJI zqevv|FowIp*;YrX1Q(eW!9a>p4l_K7NLVyOHD?E`j3HYiE?6-sN|7oL5GIg_sQl58 zU1+M!pX=M32fX+;mk$r_!NJ?jv*E$k&Ou)sl^eZB$$>t&2ZLDuUmHzi8@jH*uXrDf zYgnXR3rL&zC*R$ zoB;#dvXrXBSeRuwY*jdL1!H<3WyV?g>cReXoW(Yn(#JU71u!^?0;la6?unYHaMcTl zZu@Fwe})7LWf!FoR*^syHZrWwnjt<@%=Br>W!Erm8mwFzC;b3nBTUD1qc6HqvKPT~C#hr_)$l^E51 z^?LsYaaUgucbfJSO#3MsJ;2)k9Ob3bu0zsm@S91yII=GiNQ-Uvx+}^ZO?a4!Pagtb;)A5X6XP4_^`5m%*KC`P8JNMT)!=VD C-6SIb literal 0 HcmV?d00001 diff --git a/AI/Text2Sql/Model/biModel.py b/AI/Text2Sql/Model/biModel.py new file mode 100644 index 00000000..ed21772b --- /dev/null +++ b/AI/Text2Sql/Model/biModel.py @@ -0,0 +1,70 @@ +from pydantic import BaseModel +from sqlalchemy import create_engine, Column, Integer, String, SmallInteger, Date +from sqlalchemy.orm import declarative_base # 更新导入路径 +from sqlalchemy.orm import sessionmaker + +# 数据库连接配置 +DATABASE_URL = "postgresql+psycopg2://postgres:DsideaL147258369@10.10.14.71:5432/szjz_db" + +# 创建数据库引擎 +engine = create_engine(DATABASE_URL) + +# 创建 SessionLocal 类 +SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) + +# 创建 Base 类 +Base = declarative_base() + +# 定义 t_bi_question 表模型 +class TBIQuestion(Base): + __tablename__ = "t_bi_question" + + id = Column(Integer, primary_key=True, index=True) + question = Column(String(255), nullable=False) + state_id = Column(SmallInteger, nullable=False, default=0) + sql = Column(String(2048)) + bar_chart_x_columns = Column(String(255)) + bar_chart_y_columns = Column(String(255)) + pie_chart_category_columns = Column(String(255)) + pie_chart_value_column = Column(String(255)) + session_id = Column(String(255), nullable=False) + excel_file_name = Column(String(255)) + bar_chart_file_name = Column(String(255)) + pie_chart_file_name = Column(String(255)) + report_file_name = Column(String(255)) + create_time = Column(Date, nullable=False, default="CURRENT_TIMESTAMP") + is_system = Column(SmallInteger, nullable=False, default=0) + is_collect = Column(SmallInteger, nullable=False, default=0) + +# 创建 Pydantic 模型 +class TBIQuestionCreate(BaseModel): + question: str + state_id: int = 0 + sql: str = None + bar_chart_x_columns: str = None + bar_chart_y_columns: str = None + pie_chart_category_columns: str = None + pie_chart_value_column: str = None + session_id: str + excel_file_name: str = None + bar_chart_file_name: str = None + pie_chart_file_name: str = None + report_file_name: str = None + is_system: int = 0 + is_collect: int = 0 + +class TBIQuestionUpdate(BaseModel): + question: str = None + state_id: int = None + sql: str = None + bar_chart_x_columns: str = None + bar_chart_y_columns: str = None + pie_chart_category_columns: str = None + pie_chart_value_column: str = None + session_id: str = None + excel_file_name: str = None + bar_chart_file_name: str = None + pie_chart_file_name: str = None + report_file_name: str = None + is_system: int = None + is_collect: int = None \ No newline at end of file diff --git a/AI/Text2Sql/YunXiao.py b/AI/Text2Sql/Train.py similarity index 100% rename from AI/Text2Sql/YunXiao.py rename to AI/Text2Sql/Train.py diff --git a/AI/Text2Sql/Util/SaveToExcel.py b/AI/Text2Sql/Util/SaveToExcel.py index 31c497ad..e9d85010 100644 --- a/AI/Text2Sql/Util/SaveToExcel.py +++ b/AI/Text2Sql/Util/SaveToExcel.py @@ -1,6 +1,9 @@ +import io +import pandas as pd +from openpyxl import Workbook from openpyxl.styles import Font, PatternFill, Alignment, Border, Side from openpyxl.utils import get_column_letter -import pandas as pd + def save_to_excel(data, filename): """ @@ -61,4 +64,77 @@ def save_to_excel(data, filename): column_width = min(max(max_length + 2, 10)*2, 120) # 加 2 是为了留出一些空白 # 设置列宽 column_letter = get_column_letter(idx + 1) - worksheet.column_dimensions[column_letter].width = column_width \ No newline at end of file + worksheet.column_dimensions[column_letter].width = column_width + + +def save_to_excel_stream(data): + """ + 将数据集保存为格式化的Excel文件流 + + 参数: + data - 数据集 (列表字典格式,例如:[{"列1": "值1", "列2": "值2"}, ...]) + + 返回: + BytesIO - 包含Excel文件内容的流 + """ + # 转换数据为DataFrame + df = pd.DataFrame(data) + + # 创建一个 BytesIO 对象作为缓冲区 + output = io.BytesIO() + + # 创建Excel工作簿 + wb = Workbook() + ws = wb.active + ws.title = '统计报表' + + # 写入数据 + for row in pd.DataFrame(data).itertuples(index=False): + ws.append(row) + + # 定义边框样式 + thin_border = Border(left=Side(style='thin'), + right=Side(style='thin'), + top=Side(style='thin'), + bottom=Side(style='thin')) + + # 设置全局行高 + for row in ws.iter_rows(): + ws.row_dimensions[row[0].row].height = 20 + # 为所有单元格添加边框 + for cell in row: + cell.border = thin_border + + # 设置标题样式 + header_font = Font(bold=True, size=14) + header_fill = PatternFill(start_color='ADD8E6', end_color='ADD8E6', fill_type='solid') + + for cell in ws[1]: + cell.font = header_font + cell.fill = header_fill + cell.alignment = Alignment(horizontal='center', vertical='center') + + # 设置数据行样式 + data_font = Font(size=14) + for row in ws.iter_rows(min_row=2): + for cell in row: + cell.font = data_font + cell.alignment = Alignment(vertical='center', wrap_text=True) + + # 动态设置列宽 + for idx, column in enumerate(df.columns): + # 获取列的最大长度 + max_length = max( + df[column].astype(str).map(len).max(), # 数据列的最大长度 + len(str(column)) # 列名的长度 + ) + # 计算列宽,确保在 10 到 120 之间 + column_width = min(max(max_length + 2, 10) * 2, 120) # 加 2 是为了留出一些空白 + # 设置列宽 + column_letter = get_column_letter(idx + 1) + ws.column_dimensions[column_letter].width = column_width + + # 将工作簿保存到 BytesIO 对象 + wb.save(output) + output.seek(0) # 将指针移动到流的开头 + return output \ No newline at end of file diff --git a/AI/Text2Sql/Util/VannaUtil.py b/AI/Text2Sql/Util/VannaUtil.py index 3dfe7df1..d7e0e5a1 100644 --- a/AI/Text2Sql/Util/VannaUtil.py +++ b/AI/Text2Sql/Util/VannaUtil.py @@ -1,61 +1,128 @@ import re from typing import List, Dict, Any import requests +import sqlite3 from vanna.base import VannaBase from Config import * class VannaUtil(VannaBase): - def __init__(self): + def __init__(self, db_type='sqlite', db_uri=None): super().__init__() self.api_key = MODEL_API_KEY self.base_url = MODEL_GENERATION_TEXT_URL # 阿里云专用API地址 self.model = QWEN_MODEL_NAME # 根据实际模型名称调整 self.training_data = [] self.chat_history = [] + self.db_type = db_type + self.db_uri = db_uri or 'vanna.db' # 默认使用 SQLite + self._init_db() + + def _init_db(self): + """初始化数据库连接""" + if self.db_type == 'sqlite': + self.conn = sqlite3.connect(self.db_uri) + self._create_tables() + elif self.db_type == 'postgres': + import psycopg2 + self.conn = psycopg2.connect(self.db_uri) + self._create_tables() + else: + raise ValueError(f"Unsupported database type: {self.db_type}") + + def _create_tables(self): + """创建训练数据表""" + cursor = self.conn.cursor() + if self.db_type == 'sqlite': + cursor.execute(''' + CREATE TABLE IF NOT EXISTS training_data ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT, + question TEXT, + sql TEXT, + content TEXT + ) + ''') + elif self.db_type == 'postgres': + cursor.execute(''' + CREATE TABLE IF NOT EXISTS training_data ( + id SERIAL PRIMARY KEY, + type TEXT, + question TEXT, + sql TEXT, + content TEXT + ) + ''') + self.conn.commit() - # ---------- 必须实现的抽象方法 ---------- def add_ddl(self, ddl: str, **kwargs) -> None: - self.training_data.append({"type": "ddl", "content": ddl}) + """添加 DDL""" + cursor = self.conn.cursor() + cursor.execute('INSERT INTO training_data (type, content) VALUES (?, ?)', ('ddl', ddl)) + self.conn.commit() def add_documentation(self, doc: str, **kwargs) -> None: - self.training_data.append({"type": "documentation", "content": doc}) + """添加文档""" + cursor = self.conn.cursor() + cursor.execute('INSERT INTO training_data (type, content) VALUES (?, ?)', ('documentation', doc)) + self.conn.commit() def add_question_sql(self, question: str, sql: str, **kwargs) -> None: - self.training_data.append({"type": "qa", "question": question, "sql": sql}) + """添加问答对""" + cursor = self.conn.cursor() + cursor.execute('INSERT INTO training_data (type, question, sql) VALUES (?, ?, ?)', ('qa', question, sql)) + self.conn.commit() def get_related_ddl(self, question: str, **kwargs) -> str: - return "\n".join([item["content"] for item in self.training_data if item["type"] == "ddl"]) + """获取相关 DDL""" + cursor = self.conn.cursor() + cursor.execute('SELECT content FROM training_data WHERE type = ?', ('ddl',)) + return "\n".join(row[0] for row in cursor.fetchall()) def get_related_documentation(self, question: str, **kwargs) -> str: - return "\n".join([item["content"] for item in self.training_data if item["type"] == "documentation"]) + """获取相关文档""" + cursor = self.conn.cursor() + cursor.execute('SELECT content FROM training_data WHERE type = ?', ('documentation',)) + return "\n".join(row[0] for row in cursor.fetchall()) def get_training_data(self, **kwargs) -> List[Dict[str, Any]]: - return self.training_data + """获取所有训练数据""" + cursor = self.conn.cursor() + cursor.execute('SELECT * FROM training_data') + columns = [column[0] for column in cursor.description] + return [dict(zip(columns, row)) for row in cursor.fetchall()] + + def remove_training_data(self, id: str, **kwargs) -> bool: + """删除训练数据""" + cursor = self.conn.cursor() + cursor.execute('DELETE FROM training_data WHERE id = ?', (id,)) + self.conn.commit() + return cursor.rowcount > 0 + + def generate_embedding(self, text: str, **kwargs) -> List[float]: + """生成嵌入向量""" + return [] + + def get_similar_question_sql(self, question: str, **kwargs) -> List[Dict[str, Any]]: + """获取相似问答对""" + return [] - # ---------- 对话方法 ---------- def system_message(self, message: str) -> None: + """添加系统消息""" self.chat_history = [{"role": "system", "content": message}] def user_message(self, message: str) -> None: + """添加用户消息""" self.chat_history.append({"role": "user", "content": message}) def assistant_message(self, message: str) -> None: + """添加助手消息""" self.chat_history.append({"role": "assistant", "content": message}) def submit_prompt(self, prompt: str, **kwargs) -> str: + """提交提示词""" return self.generate_sql(question=prompt) - # ---------- 其他方法 ---------- - def remove_training_data(self, id: str, **kwargs) -> bool: - return True - - def generate_embedding(self, text: str, **kwargs) -> List[float]: - return [] - - def get_similar_question_sql(self, question: str, **kwargs) -> List[Dict[str, Any]]: - return [] - def _clean_sql_output(self, raw_sql: str) -> str: """增强版清洗逻辑""" # 移除所有非SQL内容 @@ -119,4 +186,4 @@ class VannaUtil(VannaBase): except Exception as e: print(f"\nAPI请求错误: {str(e)}") - return "" + return "" \ No newline at end of file diff --git a/AI/Text2Sql/Util/__pycache__/SaveToExcel.cpython-310.pyc b/AI/Text2Sql/Util/__pycache__/SaveToExcel.cpython-310.pyc index 589d87623db85111c7e76021b48e3848652fa28e..9744971e15abde8ff1dbb96da9b75b2dd658a352 100644 GIT binary patch delta 1562 zcmY+ETW=dh6o6;;=Jk4QCv_6%){?d=U`mUshEix7+L8ncr46K9WPyy6*-|I#tTQ{N z4a~}*q*Ov!qAcQ}Ag~`Qu#hV8P$X0&egSWNTJZo9Nz(_!U*McIjl!<>+nIBlGnX^X z%-UeaOQ$UYPy4sK@1D$E&x|(SgdQV|x}@0s)1m@>#Z{|XQKKkRUA<})jcTHpsG3Ey znk*)(R?&)enwzTHMH|{WGuB8k%@WLne}>UnQb^v%!Oj(EWWM3?#W~MgEa;Io?JbEY zHB%PCx&Gn!kGJnCR)N1Y#qH?@6v6*U@a zw2PRJcCq5RTo3=$CUq0V;bZ#Q!%<3Hf{=3_XAb92#^&>m>qeIO zAIe4PdC#p^mrAY!v*aDZXT!hsX=;Y!#%$v?h%-+C51t-qV3yCp=YI_vL0VLjHVvpy zT56jFiqKX`ph|T?4>U%lRx%#gEF_r1)Fz!IZzlp>YAsXhZA!`Vh)4>HX-sd@|NR@= z1k7k^fdNjbHh}=SKxYZ5E!a|L<`7v(50O$v>Y&ksgfwKLlwE#d`IzWoNoKKBQwdDb z%WQblEYnnjq|{h;2qR^|-URy{G6~FyKGp+oZ&Q(pMyY@K9SC8u9ZecoFkFh|SYJGY zUYG$6XM?+i?y+N%^`j&BHI_dR1ClP}S8={iyrl%Gz!rm&N_&X3hQx52wnoG#aIj6p zs2G;^Duq`G(qbprrCBaNMcBZq!g76tt;ZNbSNtp5@E%DB35`=jtXrP{E@;Mx53i}kO*`RA7#aqsksaQrjp#}16G ze|mNOvmd6ez`@!0^4ec_ep>(d>iVsp@7-7f_t>^>!^Ypc^X=xX_QoeSH{0J&7br&= z&mgjhUc>+bPtFCN$L1hngfxJo*xZTOjo5?Oi+C9E2;xyh0kI$PIH0hXAHeoOK%`W> zDDhw397`%P%JZV~o)hV!B3vi3u!uyx<~ly8YBgtxaV)V$Tx0w&MjA(8rSKz&rx07k z#*bq6X@rh=25}5=9B~3Mfp`{i5^)M~8i8fWu|)VJ0Pcn|WTyF!v)Cx~@F`S0j|#)9 zi&|Yo%H=uu{BAO5Q2ip}CB#|8%ZPJ`^N1P5D~MMSvs<_Tt*8%&D+#aU;6|2w!JTq- z8Z`b4feo@!rfR4LHRyn9hf~IhhNW6mqr40zg)Dy^V;Hsa5-a=NB~OB+yUJM)Rm(j#TgtNpHk-$Zo5!mjtGmtwo*N$zlu1l# L(^2?L)t>klvxI|~ delta 495 zcmY+AF>4e-6vuaVXLk4Qa%Uk?i;zYvMnMRHU=ak9QyUR6-9tDwH>0=kX6JajrzVX} zT8l89-@ww^k02I;AS~F}~>Zxb<-|@#FC@;WPj9?ai;j8fr%3!TneenAafF=Q=mlbrI@<{iuFCi5(Y*Zb&Wu*-5FXPzGnA!b=u2QkEWMxeQqItG@U+pe87mu+UOwV-aaG4g{S)5H(iB&`PNcb z&nwh|ZgBKf%Ay4nu5M=J-|3Wg!$Q8;R$Ql?r61ItrPymBe3y4_#tF(|N51Az)*6vy z7^tZ#xD4T?dbSUvZo!eKo{S~ByT<1h5k?DJacLH3{~3;e@I&oghw-E&jdwLf3_v}|mLIM|R>P16(saR^CsdvgiIfReg3a$*ZeG)_-t?PDV&OJbj+ zfHk56h14d2LM9|>5=g8Bnvf2VHf^DpCjUYo`gEQ;Q%km=?gKA(a;Lrht$k!!lI`}s z+?gxQ+1grr?Y-At-+P@*bF*K<=b=CSdGs&qCF!s9r}m@a&og+;C>ka)IVHvDS4qj^ zN=zA7W9qmT(`0AN6>|w3cg&4nHRT!i#=K%oOZlSGa}skg_bG|F4ev=c=4bMt6!I)! zWhoL;ZLKe9TDG<)nXqiNCp{68ZQuTQIvw8?Hx2xeHzf$Q_T#DiF~?*{GNhQyq?p1U zpOi*qrZVl6FQz(M#x&+(-cu3?^cimEXUd4inpohJ7W1&?Dx`(20mRE%0cop1-K?G6 z1E^0x{e~Me*Rp#7@z+2)*nNOBF~1Qou%6K}qI61~(ul&=u}+MvVO_MI(TdsY*#7+C*7bRJ;l!BzImWI}4xzTz?CA41A`U*1o{(_S2V$^FY zNYeqD#U$*jMDpHONF}=2aH!dKnYpaNZLh8;(@9I$?dHAxd%}Hs&-1ZpH0D;$Ym;4?=zgty}KybaA& zD73C8QgPGN^{dj?TlYLQ#LNR3erV_gEZ>(&j}GWl*0#aJsUhO-kgx#ib!R6y z87nGqrRUwu&i`4#TPpzY0OOly=ICMQ}W`SttK5NtNXEdzQFo|A}z#O0@gA+O-4?QxXu zaodx~q|-*i5*)TQ`e(Q332wwKL&wyVVPXSwB9Y0CZnORSh5DSM13+Y9w+N{8%D0KAyA|t>YWOY_yII zfPE8>89?J#$n*Gpe!0b$UsvOQ>P=8(43UBxR*t6=;{`Q}XNwdvrVJ$+yQTWPfmp+sjzc~L z#Jg@Zex(%-Bj2{fk_v{cvP@!C9`J|6t~uhZbGGu^ljY*a{M!K7&6t`Shvvu0d?Nf| zf$WgD34Q(@YE}@M-6nRgo&5}$ zS(P`f#GA#1b*%d=&`n2(?IgsmdIr^^t}4j8v}cteRb?htFcrREW3E#YR9q`cd!&B-ty$uR};QQf$u!xbtr@;nU z=(H{SR}4%hHn~e~@dOBb*+T)HAz9szH0v-dZcw_aIH6cr{#C_Ry0_XY&m3v+1h#Mc zsF5~~X8DfiX-TqW<#>mb-?GH7QNz-?kwP>U=Fz=9l}W@?=8o>_)T@p^S~5Ia8_Oiq zws*v^kZYwnaG+NvVTfsn8H*jx-zDcnxB@$X^oqL%52BRAGyysp*AxFW2eE!?^c_k?OAM8j;JwF$%FS zQ)aLYq>F=c?EZj+S{RyDHZ0l;L6~a^A9X3%1RS#&BArB zjzeTo$X1<JFdZl#!jfKf`j)GLgNIuV@xWW9}$xm~- z5*g#e24lExjsW+ruofl1VTs7)h^rFE_hJbUMmLkHdz`&*RhjD`h{m*M*W3P%TqTi)N47@;GEoG1cVa3 zwmY898fnJ&<4+^O0YYCSmjglQMu(E`SmJ#x%BsY2au9V)ExmTNeERhYSw2)h;85df z%b|_Oh?tcqYO7){lJyFql7p*tIm@tYcb3ClFE=Vu1mSilwH{imm+^V=LNPvzZ&l4N7;Y$OBDCKA`09TViD$ zj_u*`Bb0a6F_on6!6S6{AUc*b27>86gr0&&-31Lks~tmsT4P#)U=?^#JqCZuw_!x4 z5-1W)7xNV5qB`wHAwm;td6}<3Jw#}%f}9=29%6^zDK>ocLFvxzxzkhSTd$UX`_97g znfWi@$hZ8cd(+O1JD+yqamXhAxJFX(Q8VAN(E$#b-J5s*M<}ljnVY|sgA3%$@`38$ z=FsN6rxMOX9$VoCZWg&=+u!?Kq<1|NYCV~Y|i=#7BE}Tbg3PRCa+W0 zqSfjG+Oj!yA|i`kG*XiiYN}aIgNa4?eIz~wYSl|rztMz-fuB?aThy=|Indc#SIX0O z%4bf`&Rm(j_4@oTPn3WCQ^(IgU#W{7PL)5rGJoTv|Ge{_zgpWs za>SS=9C+e|kA4A!+|v-k65LsS{*zOssdsCqKh_;Ao%?M5%Qs7tw`Xr1pS|^2d3v%m z`LntAPB{Xbzj3W}?n-sLZQa4SnM=61&E7f-!e{^Z)!c44PPs&mg}K6IeTGi{d!$}Yah0;_iwbO}kPU#p5 z^3&deyC@$()}cw3Zz3v7$2_u>Z?*ia#cC>ePC~7zyemrwq?aVN=9uRv9@aWo=^c@e zNTD`I-OryXU%TQc+u#d*dCx9bv%!P;Rs^C{G9ii>kB*s{bV#wAdveyn3{U0-G#}XQ zl=vQrQgz*1iK8Ze35*btWAwdu>GOx3$tg&LQ|g0~dlLdS-U-A}XEJR6c>JhtWeyo> zGuDjyaxP`*BN+}y0Vhs&Z@ZFd_yOCWjq~`pVHw;6C7y#uoS}<*C`hy2j@K!c7DXyr^! z;ej8fF<H!pr&$J>%ziuwT^XX6#Fu{`jjau5GAT}0K!GL`w|IocsN%p6P%}ggW!`+48sesqfjGY1TzTb^Fj6U9 zui+3~4#g#hQa*$MU?VNYauo- K%B@~`t@;m&*yZ#9 delta 2665 zcmZ`*TW=dh6rS1LtS_~lc;h&6oNLpjb<&2?(hHs2=Begt6`vl^lm3YZ36%R;UegKFcuQa|1fmLfV~BGj!~jXSKMiZr~+#i9P-3rmze zkwHX?9$Xr+7&E~m172RDGugn3jW^m|5q$uI(J;x}QoRaxEWhG9A_uS-kaJBtqX-P4 z*k>dyjkIq@+hPR7dVV)WyQ_kM_*g@OX&r7%hlf7wrsI(U2~pb;QdCwg2)7?riDM{^ zqma&NP3LyeS9#clwpZ)i!^li;)$Suvu!;%k3&xL3$b_Int=cg9j4*|%8|W6jMjq4W z6l$Z*hcv#2WvBypF2MbA1VkBje3tkUs|sDm=mSM4Sm7MmR8|$NZ!-?8vB;yykPEWh z%G8-qtN6USKf&?KHgK}!R%=%TiXPYxp`Gu zI?LQQ1-C7qOO{A)M!>>)4pldpeSp9_NfIYu8Kgs;gy*3N5d~be9q>^Y?*t5^34Gnl zByk$BVZ^6Etka9&#aQrb^2l&2K=B$*zmB49Z}3lYNS6_g1o71QX)Fr1zvLE@47s&H z1QeYVs7y{lX%c6DUV@>nY!az1*`d?*d@$Xa5B@MNEH;}n)V(ziiiD{auFYjgptKA2 zK!?hfJP+e`|KK%DHMf~n?s-<3BZH+0g9d*0xw%#wYITnbaj~flm5M`ZU0Tw2``cf4>gEawM^|6RD!~_kTo_%k93;l(#SXQ(bCm*txu({6xn1b_Cg8Q2Mhguo+A zq)^-oKFLq&77iDKr}@FF($(Eh#pf^+YVuf&3hvch2e>weAm#gbDBc11)8Oy?hcq9| z3`{gu;9bZ*9>dW%3XDsfLxIgH@B|WAbMXd>w@|!|Vg|*#AVO`)b!~AChaJndYx&|9 z&ds7&L~#!V+76Q@WD^Kn;y3Y32MX)HLDu%>sJj8pYYSN z>JUCt{f59Dg}C?t9-$6LF)(7k^Cg%o#vkJUkEtaKGK2?wus)bNyIVFnLKnj5<*Li- UHotK zJ2T(+`)(8Ga#;e;qrd-S_FfMmALC%}m+p%q%8tPjThJYBKa02^Gjfqw+} z2H4OGdsqAlGUwJR+_Y$W!US@fAoV~=%?1{S(Y*-f*MHZ86cWKBMCv%CYw`U~nE< zf{wu`GklP_U{0!Z2vWGSd?|O42SNPx{IysF?Dz%|IpBQWJv({f>Nn3A~b^C^(sX1-d&#k`=N{ zma=v72BG8{sgrfu0E&^R4*e!6(8+?Kx?9*y#GV@{+Y4e1nZd3HT~U7S6Y%Ni}{{ZrpuU0 zd2tx}Ql$m2fNXOGQ=l$d9M#3HmpcMJx0hDD*8@vP-dR5ie`y2;j4bG%r4|)qFt>DY z4+1}J?^gENvg+D1`i$xp{M%6-)FP5LKwU|1n00a!lwP+Q1RyXNsSz3t{Hb} z9aP`2CO>@uUhr(7t#v+%Wxf*EBDP_uu0*=BmXtlPf=V$iU5J2`vOyxTD2Cx|m8$rG zNvCest-8HpQgZ7E?3>X}VytKD zrgj^1iU_n^p-Y^AkJenAMCwb}eHlBHRg6HVY+v#)QRxKml2V96xH1Y|A=Lz><2o7{ zW!~hA%AR)vK=lQbdTUj-*L_aIF3AvdO`L|ng}SM6yX8nlSomTOxd^NOr-uM$oxlO) zAPZQ`);TLRDUQK@B9Gm1?7E>_I?{0?2wN^W%O^F@gWOiB1;Jv$`QPxNi{W2Q#Ly=` zgGZC06Hea&o&-;B;(;OMP;m-H8QK$2n0<7%_AukMAM_{(nvNgzC`OlQ9WWY!3@9P* z)ky==hcp<(&x%2bgv$E%=rZ_MGkq*u(yl{oTDD~he93I0!%}PJHDW-69^S4L4j~ks$&cG%+4b5Lml-*Yi%kIzS}7(+!}2!l#2LRTiQ#FdKgaTa^EkVmqZ zR59L@InHGm6!mv$@O&IeSbXJz59K!TajO~z6hw{d&EbEqnb#Jo? zO>rSe7N7uSL(Y7+EElU>rRU%PE|Nl88S|0qp5+l27(1Yova7<6q)5Z&D#sVNS8L{E z$YSUL0$l;36V+8Y%=K|pu8KIULjH&Un|aMM|t{;`lOKHILEVQNcsiJ=U*CEKJ;UZ(r!7W5t)mTWC z5Vpr>VeQ$F2GTt79`8VR2ABAio=Ub39xuf^7umP&Ejw0zysl zV!PBP8U!`gzDgAFCR5yh`)LArXg|voT!*G(yj%quOf|B9y>l%R89X}oLoDgV>%RU% zd97BJ38?AYiM|b>o}g|2o?3ifS5aLlm5G+b@}SIWZ9m&poh=3CK{1XUW&*GvB;hvH z=u73Y_=a8(wP=q4b?)uK1?QV^@2Ca*+n6qrSD|y@{xk%+w*5LZWom=k-I+0}o0F$b RZj3tFKI-ICbeLNI`7byXGLQfO literal 5712 zcmbtY>vI&x5#Q(C-b2z!fCQL_z{chPOCaz|CNTo>suD6FVd5>4t<%j&9PB-q-NQ&Z zRbh+7#74$;NSv}$v6OOfNX1}RIWB^!_+Lm>zU@bwlf)-~_lG#Yo;#fm5U!-M(#-YD z^z`&}_jLD6zM&zi;JbV0UnBchD9Sg~sQ)Z9cA{5HF?6cfu*;cz1aKvuo7H^wS?d3edqAbdnv)H6=ui#A+ioJ5b!s4vq zg2Ed3$_d?m1noxFgmx1Tp}h+21ZzgSS+-Ym4YStpHB)O@%e2atu%#Ci&~9hTSnCDF zUdPv*TF=_}Dz{oIP>D06&}A zMwSHsTlkY~(`il2@K)@zgKwEoAVYOPmgMHUr0f2q9)$O$Jiq7g!Jy&5;NaoC=hD33 zXY#oq+{+6*$GoH(M0Te}Dwj(g^E2+)fkNK%M+EoIxSI>!XgXgQ>FVz6obT=K^)ig7`nK(Ort7I^w?F;d zu5DX8=zmAYGuyhK+OfT>(;GWA=CGlCkU%t5^z-R_HkzMugjSeA9n55*NzJyGi zM~d8&R?*-)LmB5xwK)jw#R?*mO^H$GG#?F2h>V}R`#of{4+^w$Mb}LYxm>`9C`nDg z(I`Qf@!?d_^)m{5OwhdN*wLeV`v;xDgNODG4E7v4+@D+#SdNoRWx3-7QOC*VSEPLO8Cw> zCoRp`tM44Gb*?OAcoj1Z3L|-JcYZ?a!r0lATLfO^Kt!unP0KZL94pruh9S#s)vr?5 zxj<>5Ga+~(4-RWRB1T(@RZpRx$?_nQ@f>f|^LaLi0t5ptPy5#tu@$JY`ffvS_X*}5 z&x_M1j^K3m<#Qt^dJdi#6hNl5j}E5ebJS9F?#o z;h2OYJ1g{}vM(#z@L!(u(4e(@-^Nk(D=RgY8k1XUZUN>a6ns?Ahwp{j$bPE^Sjv5Mf;RINb;WAsJP z7N+$^CC}DDfaE=ltCulO#v}%(RG#NAv zn4BQomW0U(nvArqLX!lOwkr8a`HFIMV$U6_a)! zF7N+Bxl8X;ZiVc2BD;esxxiEZ=PHY;_Gu7(0i~+ig(G`E1l1DmcWUPY+SSm%hiH%+ zWh&D$Brm-`2xk$LQzLw=;Q)8t{FZij5|^#JpAqSA$k2ia&JA-!KJOnTdPLaL#B=z$ zXHg0Zv=()GB;HpQUzt$Gm9c0^c?k*2aiye8sne9YC<8V46(y0OG{Sv^`{T3@#RBboNY!BRt(y1|nq#rf`B33?f2!~BjMa3(k1qD8UgsA{O9 z7ky*P>o)!K8g_<_2eG==16wxgw~Xy5zB-L<`syjYq@2cPr;KR@Ti2LUR79I^k|iN6 z(q2{1*aH z({K)rlJ=18>Lq=V?W$9uX=N%rt@J8CZ9Jik8{=ll1fD83`&LOW86|TEuYyiJGB1RGF9>@OnC}bGLnFIUB|{aV z$PiYOOrXnjO=c|jOR1?SU1H6?3ZUZ2%s|R%sSFg_`Kl!y!|20JZ}AB=#av zt))#u`i`awLQNjy!13a!Dt1F>%2EZnma(?F6E(tf92Xt#AlFQjqnrOoX;k*0c(fK% z$B~nat0jo44bBg4M4y1m@X2{7({Ll`fzz1KDsC`o_7@|d7;RtVoB8lhb2t9jz3AAs zZ_dA3zWh%4=ETf=-S$~qx|>J%eSsBI-?6SW^P=*bMr6dPp-~f|J%2> zFIAeeAHOsE!F4q6+_3zzPs(c|FOQl_L+;nojd;v2xa!# zE$}q^*Q-S2=4AQuYvnhlX21M!?)r3%2uTJKn3;MVyPlc6UcU8f%sgDXm$q+vx`tb9 z0)OS(=gS|zSy4|%M@MlP`ev_uICJrBtWxQ2A2`z29mSQ@#HA|d2inXRbtZj<7@)ejX{k@Fh>!*y8ihHj#VcdBiq@X=O;rV(B z%o^8y6MjH1X})Ha(vj<^fAbm2Qcl+Q)^10sLz7D!D zs$$G6`om8?OIKT-LxjRr3b4+YJJ!C~%>7j#K-0&XJ5Oxd*stQ%!4ytPCAWp=GqltCDTaT10V7<3g3LV~;b6idd%PiVT4$2(16P-qh` zQ-|q`R3;Z_f(PMYT*&bZ6NGwS9PH`q3nDyM%u2Tun24I0LJ%&V&7|`p7wDpxOIm?J zH%;*~A`}DHh{Jf;fi22L0kMSxH%=p7E>kFQ-wV`C5GI2JS|xeD*AxRI3x0kNiZVB~ zv;^{yRo@%BriQ*xge!1DZPnMK2%lEQ^`GWAPO?*sP)B$euT-f*Mo|7$344@=1Xf{` zrE+*A5)`I`DELab>61J=Dkyd@JPODM0r?D2xj;8AV0Pq1R**jz4OBH!<)R9fVM&45 z;PVbjN-}8@jVI_Z2W|6L=MH+1>ZmxJru>*p&!hapQkH6QwH+ByLPqwuX6TmQsE71~o>-PhgqqOV diff --git a/AI/Text2Sql/app.py b/AI/Text2Sql/app.py index 22e8cfb1..d0df49f7 100644 --- a/AI/Text2Sql/app.py +++ b/AI/Text2Sql/app.py @@ -1,84 +1,13 @@ import re -from typing import io -import pandas as pd -from fastapi import FastAPI, HTTPException, Depends -from pydantic import BaseModel -from sqlalchemy import create_engine, Column, Integer, String, SmallInteger, Date -from sqlalchemy.orm import declarative_base # 更新导入路径 -from sqlalchemy.orm import sessionmaker, Session import uvicorn # 导入 uvicorn +from fastapi import FastAPI, HTTPException, Depends +from sqlalchemy.orm import Session from starlette.responses import StreamingResponse - from Text2Sql.Util.PostgreSQLUtil import PostgreSQLUtil -from Text2Sql.Util.SaveToExcel import save_to_excel +from Text2Sql.Util.SaveToExcel import save_to_excel_stream from Text2Sql.Util.VannaUtil import VannaUtil - -# 数据库连接配置 -DATABASE_URL = "postgresql+psycopg2://postgres:DsideaL147258369@10.10.14.71:5432/szjz_db" - -# 创建数据库引擎 -engine = create_engine(DATABASE_URL) - -# 创建 SessionLocal 类 -SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine) - -# 创建 Base 类 -Base = declarative_base() - -# 定义 t_bi_question 表模型 -class TBIQuestion(Base): - __tablename__ = "t_bi_question" - - id = Column(Integer, primary_key=True, index=True) - question = Column(String(255), nullable=False) - state_id = Column(SmallInteger, nullable=False, default=0) - sql = Column(String(2048)) - bar_chart_x_columns = Column(String(255)) - bar_chart_y_columns = Column(String(255)) - pie_chart_category_columns = Column(String(255)) - pie_chart_value_column = Column(String(255)) - session_id = Column(String(255), nullable=False) - excel_file_name = Column(String(255)) - bar_chart_file_name = Column(String(255)) - pie_chart_file_name = Column(String(255)) - report_file_name = Column(String(255)) - create_time = Column(Date, nullable=False, default="CURRENT_TIMESTAMP") - is_system = Column(SmallInteger, nullable=False, default=0) - is_collect = Column(SmallInteger, nullable=False, default=0) - -# 创建 Pydantic 模型 -class TBIQuestionCreate(BaseModel): - question: str - state_id: int = 0 - sql: str = None - bar_chart_x_columns: str = None - bar_chart_y_columns: str = None - pie_chart_category_columns: str = None - pie_chart_value_column: str = None - session_id: str - excel_file_name: str = None - bar_chart_file_name: str = None - pie_chart_file_name: str = None - report_file_name: str = None - is_system: int = 0 - is_collect: int = 0 - -class TBIQuestionUpdate(BaseModel): - question: str = None - state_id: int = None - sql: str = None - bar_chart_x_columns: str = None - bar_chart_y_columns: str = None - pie_chart_category_columns: str = None - pie_chart_value_column: str = None - session_id: str = None - excel_file_name: str = None - bar_chart_file_name: str = None - pie_chart_file_name: str = None - report_file_name: str = None - is_system: int = None - is_collect: int = None +from Model.biModel import * # 初始化 FastAPI app = FastAPI() @@ -87,6 +16,7 @@ app = FastAPI() def read_root(): return {"message": "Hello, World!"} + # 获取数据库会话 def get_db(): db = SessionLocal() @@ -95,6 +25,7 @@ def get_db(): finally: db.close() + # 创建记录 @app.post("/questions/", response_model=TBIQuestionCreate) def create_question(question: TBIQuestionCreate, db: Session = Depends(get_db)): @@ -104,6 +35,7 @@ def create_question(question: TBIQuestionCreate, db: Session = Depends(get_db)): db.refresh(db_question) return db_question + # 读取记录 @app.get("/questions/{question_id}", response_model=TBIQuestionCreate) def read_question(question_id: int, db: Session = Depends(get_db)): @@ -112,6 +44,7 @@ def read_question(question_id: int, db: Session = Depends(get_db)): raise HTTPException(status_code=404, detail="Question not found") return db_question + # 更新记录 @app.put("/questions/{question_id}", response_model=TBIQuestionCreate) def update_question(question_id: int, question: TBIQuestionUpdate, db: Session = Depends(get_db)): @@ -125,6 +58,7 @@ def update_question(question_id: int, question: TBIQuestionUpdate, db: Session = db.refresh(db_question) return db_question + # 删除记录 @app.delete("/questions/{question_id}") def delete_question(question_id: int, db: Session = Depends(get_db)): @@ -135,16 +69,17 @@ def delete_question(question_id: int, db: Session = Depends(get_db)): db.commit() return {"message": "Question deleted successfully"} + # 通过语义生成Excel -@app.post("/questions/getExcel") -def getExcel(question: str): +@app.post("/questions/get_excel") +def get_excel(question: str): # 指定学段 - question = ''' - 查询: - 1、发布时间是2024年度 - 2、每个学段,每个科目,上传课程数量,按由多到少排序 - 3、字段名: 学段,科目,排名,课程数量 - ''' + # question = ''' + # 查询: + # 1、发布时间是2024年度 + # 2、每个学段,每个科目,上传课程数量,按由多到少排序 + # 3、字段名: 学段,科目,排名,课程数量 + # ''' common_prompt = ''' 返回的信息要求: 1、行政区划为NULL 或者是空字符的不参加统计 @@ -165,45 +100,10 @@ def getExcel(question: str): media_type="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", headers={"Content-Disposition": "attachment; filename=导出信息.xlsx"} ) -def save_to_excel_stream(data): - # 将数据保存为Excel文件流 - df = pd.DataFrame(data) - output = io.BytesIO() - with pd.ExcelWriter(output, engine='openpyxl') as writer: - df.to_excel(writer, index=False) - output.seek(0) # 将指针移动到流的开头 - return output -# 添加 main 函数 -def main(): - # 开始训练 - print("开始训练...") - # 打开AreaSchoolLesson.sql文件内容 - with open("Sql/AreaSchoolLessonDDL.sql", "r", encoding="utf-8") as file: - ddl = file.read() - # 训练数据 - vn.train( - ddl=ddl - ) - # 添加有关业务术语或定义的文档 - # vn.train(documentation="Sql/AreaSchoolLesson.md") - - # 使用 SQL 进行训练 - with open('Sql/AreaSchoolLessonGenerate.sql', 'r', encoding='utf-8') as file: - sql_content = file.read() - # 使用正则表达式提取注释和 SQL 语句 - sql_pattern = r'/\*(.*?)\*/(.*?);' - sql_snippets = re.findall(sql_pattern, sql_content, re.DOTALL) - - # 打印提取的注释和 SQL 语句 - for i, (comment, sql) in enumerate(sql_snippets, 1): - vn.train(sql=comment.strip() + '\n' + sql.strip() + '\n') - - - uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) # 确保直接运行脚本时启动 FastAPI 应用 if __name__ == "__main__": vn = VannaUtil() - main() \ No newline at end of file + uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True) diff --git a/AI/Text2Sql/vanna.db b/AI/Text2Sql/vanna.db new file mode 100644 index 0000000000000000000000000000000000000000..11abe5afacafd0849cb938336c3d167ead4e4c29 GIT binary patch literal 20480 zcmeHO-%}Gu9KRq|CvE*PeXx()yae>hK-3qtP9vN)V@PNSwM-u}$K`~yA;HTI&{=(0%x;8USr$;}8Ty@*7 zGuxiSjb0Q1iU37`B0v$K2v7tl0u%v?07ZZzKoPJa&{6l^u04Ak-7?=37gML=uuqMo z#8YW8AyhtYY7KGTFo(k4BYqB5BK9ozrL$ZuA^tex;qJq-(WTbZ5b-yiWZ zr}$3J&$WgTm{uy8h{U3-{*#ysN+hEqc`J*&&?ick-)a^;B_;GFlW|K1?UkAj%kD`g zzau#yMeJ844@lzan0O{4B-05QSIzIo2~|3wWM@{RHSA~$$YOEQB_T)d%hI~nLMGRp4{? zl!eK{k3T87apzuK)q`meqfA^rfK-XBDv8B;CN8sPK~AlU%MC={PQ4hiG!@=!a!@FE zA`<9oKf;9?8d2~o7|;zJ-cZ;V_5}mzNH;P`3x?Vt8Q!%9y8_{c-HoV?>uhbrwP@;; zQ{bIhHx=AURMCkqxDaRQPjvV*|?8-_?aO=Cs-wHhpHzo>h1D}QL|=BRfzbtauRN?nzKyTHE3+2cNc=iib&KXMvEz&ROm4p(Lz$KW88kFRi=s> zgcwsx4P{N#W2>~Y(Rt9>8f?c)E(Dwn9Q!8nu@av_Qe^*ghj2+JW(FrP_EpA>9qKJrgZ5nOrgrkRs7@ZQMjZ{!!3R;T9dD`$`|MobwVtEE3pJdC9jFOC#`xnGN*3RWiOeru>SJ5soxg=L}r z)>U0itD_tGJpVM4&*lCV!5Zfi5Dmfiv-UMMPZ63lSnB4Nel2DO?MpN73AARLR_XTT znPwZToXR=HhfB(hQPlEcdB#jg$b(Gq)&F;_GqagyGaxugOeNEjAeJ5v6><}> zwXd(kzIn&?=L~%4MG>F~Py{Ff6ak6=MSvne5ugZA1SkR&0gAx8fWS`2Cbz?KnxUSY z&1ar7^__5g*Dvb-S5GQX|Nr9(*EaS4Q~$qBPd4$g>i(=ZDNFtTw%oe}caz`A?m>|X zPZRb3S3gfv)mLH{YqMv>c3JBGZ*o!p|N7@PbpF+>Rd8d7=J)>i>WJ6H?Uw zzvkgF^DL*ICLm{&H@5$uoMjAx^zDSo<#S`JgIC~?YdK~vlU+Txq6W$3bHg|!Zeh|KNJgUK3YlePdHet>8HZ#? zNHm-lBBuqTsb}5QbEK>qvj<^%`K6VrQFGQqxHzV*Qo1=54wALXUKv1FA=rp2TRVuZ zVwaEqVX8;+WwKg(5Jb?1yshDeZq6Grg)VAgkt8V3(&mj3ng_vsMyIQ>eHHM)hGM