From 4093ba4ad5fabe1c5781e4c39ee7f71dfeae3c1f Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Sat, 8 Mar 2025 13:38:01 +0800 Subject: [PATCH] 'commit' --- AI/AiService/MarkdownToJsonConverter.py | 22 ++- AI/AiService/Start.py | 27 ++-- .../MarkdownToJsonConverter.cpython-310.pyc | Bin 5331 -> 5303 bytes AI/AiService/getExcel.py | 65 ++++++++ AI/AiService/部门项目总览.xlsx | Bin 0 -> 12317 bytes AI/Text2Sql/Sql/CreateTable.sql | 4 +- AI/Text2Sql/YunXiao_Deepseek.py | 65 ++++++++ AI/Text2Sql/YunXiao_Vanna.py | 139 ++++++++++++++++++ .../java/Tools/Crawler/Util/BookLesson.java | 3 - 9 files changed, 303 insertions(+), 22 deletions(-) create mode 100644 AI/AiService/getExcel.py create mode 100644 AI/AiService/部门项目总览.xlsx create mode 100644 AI/Text2Sql/YunXiao_Deepseek.py create mode 100644 AI/Text2Sql/YunXiao_Vanna.py diff --git a/AI/AiService/MarkdownToJsonConverter.py b/AI/AiService/MarkdownToJsonConverter.py index 8cf28b36..94f14c22 100644 --- a/AI/AiService/MarkdownToJsonConverter.py +++ b/AI/AiService/MarkdownToJsonConverter.py @@ -7,8 +7,9 @@ from CommonUtil import * class MarkdownToJsonConverter: - def __init__(self, client): + def __init__(self, client, language): self.client = client + self.language = language def extract_level1_title(self, markdown_content): """ @@ -110,36 +111,41 @@ class MarkdownToJsonConverter: return result - def generate_descriptions_for_json_batch(self, json_data): + def generate_descriptions_for_json_batch(self, json_data,language): """ 批量生成描述语句,并替换 JSON 中的 text 属性 """ for item in json_data: if "data" in item and "title" in item["data"]: title = item["data"]["title"] - description = self.generate_description(title) # 同步调用 + description = self.generate_description(title,language) # 同步调用 item["data"]["text"] = description if "data" in item and "items" in item["data"]: for sub_item in item["data"]["items"]: if "title" in sub_item: title = sub_item["title"] - description = self.generate_description(title) # 同步调用 + description = self.generate_description(title,language) # 同步调用 sub_item["text"] = description yield json.dumps(item, ensure_ascii=False) time.sleep(0.5) # 控制逐行输出的速度 - def generate_description(self, title): + def generate_description(self, title,language): """ 调用 AI 接口,生成描述语句(限制在 20 个字以内) """ + prompt='' try: + if language=='cn': + prompt='请使用中文回答。' + elif language=='en': + prompt='Please answer in English.' response = client.chat.completions.create( # 同步调用 model=MODEL_NAME, messages=[ {"role": "system", "content": "你是一个专业的助手,能够根据上下文生成简洁的描述信息。"}, {"role": "user", - "content": f"请为以下标题生成一句话的描述信息,描述信息应简洁明了,且与标题内容相关,不要使用与标题内容相同的语句,不要包含任何序号(如 1.、2. 等)或 Markdown 语法(如 #、- 等),且描述长度不超过 20 个字:\n- {title}"} + "content": f"{prompt}请为以下标题生成一句话的描述信息,描述信息应简洁明了,且与标题内容相关,不要使用与标题内容相同的语句,不要包含任何序号(如 1.、2. 等)或 Markdown 语法(如 #、- 等),且描述长度不超过 20 个字:\n- {title}"} ], max_tokens=20 ) @@ -156,7 +162,7 @@ class MarkdownToJsonConverter: print(f"调用 AI 生成描述信息时出错:{e}") return title - def convert_markdown_to_json(self, markdown_content): + def convert_markdown_to_json(self, markdown_content,language): """ 将 Markdown 内容转换为 JSON 格式 """ @@ -179,5 +185,5 @@ class MarkdownToJsonConverter: listAll.append(item) # 生成描述 - for item in self.generate_descriptions_for_json_batch(listAll): # 使用普通 for 循环 + for item in self.generate_descriptions_for_json_batch(listAll,language): # 使用普通 for 循环 yield item diff --git a/AI/AiService/Start.py b/AI/AiService/Start.py index 67f64a6c..b0b783bf 100644 --- a/AI/AiService/Start.py +++ b/AI/AiService/Start.py @@ -20,11 +20,11 @@ client = OpenAI( ) -def convertMarkdownToJson(markdown_content): +def convertMarkdownToJson(markdown_content: str, language: str): # 创建转换器实例 - converter = MarkdownToJsonConverter(client) + converter = MarkdownToJsonConverter(client, language) # 转换 Markdown 为 JSON - for item in converter.convert_markdown_to_json(markdown_content): # 使用普通 for 循环 + for item in converter.convert_markdown_to_json(markdown_content,language): # 使用普通 for 循环 yield f"{item}\n" # 每行数据后添加换行符 # 添加结束标记 yield '{"type": "end" }\n' # 结束标记后也添加换行符 @@ -47,17 +47,22 @@ def get_local_ips(): # 流式生成数据的函数 -async def generate_stream_markdown(course_name: str): +async def generate_stream_markdown(course_name: str, language: str): """ 流式生成 Markdown 数据,并在控制台输出完整的 Markdown 内容 """ + prompt = '' + if language == 'cn': + prompt = '请使用中文回答' + elif language == 'en': + prompt = 'Please answer in English。' # 调用阿里云 API,启用流式响应 stream = client.chat.completions.create( model=MODEL_NAME, messages=[ {'role': 'system', 'content': '你是一个教学经验丰富的基础教育教师'}, {'role': 'user', - 'content': '帮我设计一下' + course_name + '的课件提纲,用markdown格式返回。强调1、标签只能返回 #,##,###,-,其它标签一率不可以返回,这个非常重要!2、不要返回 ```markdown 或者 ``` 这样的内容! 3、每部分都有生成完整的一、二、三级内容,不能省略。'} + 'content': prompt + ',帮我设计一下' + course_name + '的课件提纲,用markdown格式返回。强调1、标签只能返回 #,##,###,-,其它标签一率不可以返回,这个非常重要!2、不要返回 ```markdown 或者 ``` 这样的内容! 3、每部分都有生成完整的一、二、三级内容,不能省略。'} ], stream=True, # 启用流式响应 timeout=6000, @@ -100,11 +105,12 @@ def root(): @app.post("/api/tools/aippt_outline") # 仅支持 POST 方法 async def aippt_outline( - course_name: str = Body(..., embed=True, description="课程名称") # 从请求体中获取 course_name + course_name: str = Body(..., embed=True, description="课程名称"), # 从请求体中获取 course_name + language: str = Body(..., embed=True, description="语言"), ): # 返回流式响应 return StreamingResponse( - generate_stream_markdown(course_name), + generate_stream_markdown(course_name, language), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", @@ -116,9 +122,10 @@ async def aippt_outline( @app.post("/api/tools/aippt") # 修改为 POST 方法 -def aippt(content: str = Body(..., embed=True, description="Markdown 内容")): # 使用 Body 接收请求体参数 +def aippt(content: str = Body(..., embed=True, description="Markdown 内容"), + language: str = Body(..., embed=True, description="语言")): # 使用 Body 接收请求体参数 return StreamingResponse( - convertMarkdownToJson(content), # 传入 content + convertMarkdownToJson(content, language), # 传入 content media_type="text/plain", # 使用 text/plain 格式 headers={ "Cache-Control": "no-cache", @@ -144,4 +151,4 @@ if __name__ == "__main__": print(f"http://{ip}:5173") # 启动 FastAPI 应用,绑定到所有 IP 地址 - uvicorn.run(app, host="0.0.0.0", port=5173) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=5173) diff --git a/AI/AiService/__pycache__/MarkdownToJsonConverter.cpython-310.pyc b/AI/AiService/__pycache__/MarkdownToJsonConverter.cpython-310.pyc index 744585530d957c9561ce12a0a349c9642b54b2f7..5fae9c805620cb94983f042c3a4738058fa78797 100644 GIT binary patch delta 620 zcmYjPJ#W)c6n)qBvs2e@o7fSJ01-uWuo`Ge#ixL(A_%536jjI)R3SDfA!(Xt6RDLW zsKg3Ix&tf~%>t|_13v&JRu<$9iG`UTQ0_}J)RvEq&O7(Kd*Aa<@2h8bY};h?Jh}Sg z>9;TTo(}$SUsnN&!-1hGFpnQ6-a#2}nkD-T`Fy<<_QKW$AKPXLe0*zue^Dnr`b@CP z5~#9=t9>PaMCmb643va;fk}SE0;RzEYQhd!q7I<1CE8e5ud{JgH5hADRUNNt1xCqE zpHm_y{Z>NkD5-KRiU$%rs#^(zVz%D8Qn z_^QWK7k_8n^QqR1I-4mEp0~H6RErz!Fy&Dz47WU8aH>crq+uhd#Wk^rf2=TXf(5QJ z0~vZv`jnS5U=1JHe*u<;cc=e>z9e^=! zvdb`^+nJ)_dEZD3IZbyNKjm&aKCgFrp%8au^}K|0c$k};^d!9~VHp>k*|WYiy0XQM zxDmHPKQn5+^SCM2q(@#VP4Ubr!v}ot6f;w52DHPS3a)6g_{+JMKb9xGvQKdz_uUh? i=AJG&)=8m~6h3F}D~b2gysU{2jHwnPsb~_aXnoZRZ4j4@7No?^RT9$1%)JB}Zq(pL z7ovC;y0NqycP=Cg#f=+RBDipOSN@0o0cT<=Zk&OSZ_eX8=giE<#ydxDWvsS9z%K=`}*h_1Moge z=YR#c0C#GA#xY=B5L285l4n8jKa8_1^tAxCA<%Yk3AnZheLc{}UVRS6xm9lb&8?2T zMi!+0WZ#gwhSXdNB%A+_puweBr|e@>HDPk=Ia>w;jVUl%3%VY>(8Xk(hiF(S7@ z_Bf77)OUMxbXCvL_e6Twb3P$fhUJM}G%KpsG!+xGhp#9-bcLx*Bd`=ZFX0J2On4LO zIJ)9Dk__0ZWG_>KUbGC{T7(+(t-w-qbuP~WJxyLug~nRD>k3g-GDQWa=vVT@zLScc zQ!r2QROWbSkDl_(s^9Qi?n-1dQu}UG)KpAPL5Xgpinv9ashlyXMbMz&mylKf diff --git a/AI/AiService/getExcel.py b/AI/AiService/getExcel.py new file mode 100644 index 00000000..d43494a2 --- /dev/null +++ b/AI/AiService/getExcel.py @@ -0,0 +1,65 @@ +import pandas as pd +from openpyxl import load_workbook +from openpyxl.styles import PatternFill +from openpyxl.formatting.rule import ColorScaleRule + +# ================= 创建测试数据 ================= +project_data = { + "项目编号": ["P001", "P002"], + "项目名称": ["CRM系统升级", "电商平台开发"], + "项目类型": ["升级", "新开发"], + "起止时间": ["2025-03-01 ~ 2025-06-30", "2025-04-01 ~ 2025-12-31"], + "当前阶段": ["开发中", "需求确认"], + "交付状态": ["未交付", "部分交付"] +} + +progress_data = { + "项目编号": ["P001", "P002"], + "整体进度(%)": [45, 20], + "剩余人天": [120, 300], + "当前人数": [8, 5], + "资源缺口": [2, 3] +} + +maintenance_data = { + "产品名称": ["CRM系统", "订单管理模块"], + "维护类型": ["付费维护", "质保期维护"], + "工单量": [15, 8], + "平均耗时(h)": [3.5, 2.0] +} + +product_data = { + "产品名称": ["CRM标准版", "ERP基础版"], + "定价(万元)": [12.8, 28.5], + "近半年销量": [35, 18] +} + +# ================= 生成Excel文件 ================= +with pd.ExcelWriter("部门项目总览.xlsx", engine="openpyxl") as writer: + # 生成四个工作表 + pd.DataFrame(project_data).to_excel(writer, sheet_name="项目概况", index=False) + pd.DataFrame(progress_data).to_excel(writer, sheet_name="进度管理", index=False) + pd.DataFrame(maintenance_data).to_excel(writer, sheet_name="维护工作", index=False) + pd.DataFrame(product_data).to_excel(writer, sheet_name="产品信息", index=False) + + # 获取工作簿对象进行格式设置 + workbook = writer.book + + # 设置公共格式 + for sheetname in writer.sheets: + sheet = writer.sheets[sheetname] + # 冻结首行 + sheet.freeze_panes = "A2" + # 设置列宽 + sheet.column_dimensions['A'].width = 15 + sheet.column_dimensions['B'].width = 25 + + # 为进度表添加条件格式 + progress_sheet = writer.sheets["进度管理"] + progress_rule = ColorScaleRule(start_type='num', start_value=0, start_color='FF0000', + mid_type='num', mid_value=50, mid_color='FFFF00', + end_type='num', end_value=100, end_color='00FF00') + progress_sheet.conditional_formatting.add("B2:B100", progress_rule) + +# ================= 最终文件生成 ================= +print("文件已生成:部门项目总览.xlsx") diff --git a/AI/AiService/部门项目总览.xlsx b/AI/AiService/部门项目总览.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..246fb7ad0c6413206b99b65fb559d84ac344b734 GIT binary patch literal 12317 zcmeHNb97z#+CH&u+ia}Hwr$%D+SqLDG>sbDwi_Ew8a7sAf2W<9Z)SSu{(slHcdxbg zKG^TG&OZBne$OjK88C2k03-k!0058xq+O;L?LYtkYDfS84FCH~cQ)T+=ICR+J@!?*vmBKm15Z%0vqz^Dz8A+ z4qZNZbs)$jt_IW2O7*2t3uer5|Hi<*REM88bml;0fRPr%&=w@y;7Fk%vT}ZN(MAgSn>I+hRbRIe0Y9|#wvMO#M zn1pY;&*R9-Dqr-$5Xto>TUjJ3Ixks+dwEF8z0(^gT1w|+ai_A4J`}gPtGVkm329Gi zxAqv?(x##unc=Tw;xp%>)yNZ!8n`fMpYTI4U#IzN56Ee)8Q)fb%n7R;mxoj}a%LYS zOl0^jBp2)>2!wIV9!;lV47nIvELC_7*^*q{;i{=va$8jyXF2nbdK#G8c3p^ObfP_Z zG00~QDU-6H-m*+c3{qrY`D)j(oqzt|*~bafTRL<$91uxVv<4Kced3z9u(;vRaZEtDvy(uQHh|cPY zMHuPG1%sk1(yN6>^fP}r=hj~=pfE#mfu#)!h2gP@bM()mR8H&2)Rd_sKrrnUN*~jP) z`s2$mBM_FD4=9>V=W%Py)2-{(7fOQze4ve+thnK+dNOQnVC~4jt{rJE@sSEqly!JroqQ{0s`OAcRcOdwLN6vI{VUX`_{$dcbNhyRC$rL=Xd+JPv^RbUo2|zpWMc8 z7a9-UKZ*4`r2`f4rVBBc#;rivao3%BuTHn8Kwh zZz*xOF|wtL#%zP~nUj~d2c%G$T@Idwl28nI>(R6QLrC;D$8g!zIB42rbnI^@V$vfB z4e6u{IS5J9cj_+C-W0qaKDq|8L~li3`mz(_Sw)ECbP+{hgqeJW*nO$j)0*c#(etsN z`i=m1xAPbdPYR`UOlJy_fp!Pd^GW+SDSIW(8$>;vg>c`##7PQEl)^igacWVs?*;+ z*6V!JhHHb5tTVR4pGf;wlY=E}Ph^x@5)Y~_e#Fl93|gJtH2A2;pu;&?=K)^Wyn4J6 z&fFr_Cr-OR0U{ladAHK=D%k;j-dnmrH#VW4v!4mCC+NIYQXAGLv)C)h)NS~#CG6)ln2eYz`&QeI6f>chz9&|p3|BcXyO8Ei z*Jf&pEJqEDYDJ7om-tH7k>tqij~`LrpA;zYaN9>My+L!prW6kVgUCgL+^|GEgO$c` zWfu{9L6kirjg9VGc@{K)0`aZS4~jTV@?}3rq4$nXF~js0Ru_04!|(L_GT)JHg-;R0);^=9D+4Vdex?pFyg$g=0W0W># z#rEXzu*y$ia}uhmjs~-;vfd=clHS!hq!(f${YP}hLwk7WkE28Z&U$dkPvA*;*+r?q zT{9cF=VlkJTN=yxMUh?PO242c9Ue&ZPYJ7n1aRacBG;j^2o8MkIKH72ocxjwz=67X zPe)j}h4zC2Kkt+;N(Qz^5E-J9wd$&MUMJpj3Sc}KZy2@R!uu!o6*2S>lmpqvg!pgS$MTbX@t^Hin32V9Aig7k z-(ZSyOW}1p$5JAs>nUAU7Zk^drEV9I??6R7zmsr2!af^-CW*_-aW${-u(;8&Si%)% z+oNNw&15~pwb@F+c)8p1{2pGLnpl}6hJ^7|X#3`(6sZ;=M9mfil`J3z!$IXhnk3i0 zyExSdD$5NQUY_DDybMY!poCDsSsmTehc1&P>lDVUDac>WgpDufgy6O=6ugR7>7|-o zk8d)w1AJFi^J-Nz?-070c|Ly?dS`V1yV?G$qF{~1!TwR>1=N-x0V{vM2-36yXtAs|qr?6%Y2N5r0 z27^KB+_HD%DZQ>N&*yh#9Iicqw3yk&w}JGXLo`?yOP1LNkR0da=7B_5VWmb}!3Wzx zVTM9T!Q2}}n;n8`E zUDS{eF;?{z(mP$u;krIQ;1Cl)OC|D-Ie0vn5s71?`kyOh`oGs7z8IHvcDsjV9K|heV*FMRd4F;RqYc| znX=HY*iB@>8W!k8$L|Tvgm?SG)rc3QL7zZI;s?o%9S9BoPSbxLL+_kM`(7ack(+cW z`0f?a9n)~nSH9vLl=k;@g}$Xc4HyiBHJ-2X^3GNXIs%p(P?GqzwwP(0rA>))S-0?- zV%t>a3)#WL%?Aot`FnkyfsgQa>8MN8LF^$$7j2xb@-sHz$6Xfv9J*o|TSU|#eqeL6 zRyyB<;4{s{0-wN*rEG+#jZ{OhtgURb4>?(#$}#mdb79;2MfR$SZf(8G7Z!oAf|2hgl@yYF1FM{q6Ho_UxX$|KNq_H3~HTMzcNErr(YthflBt{2-doL(S~X~eG@ z{~b)|xm|?Y4c@~fD3;oqi^pBOE#aYj$`pa0+JY|KA``W0dTxY~?*3+?cP}{}Ik-I7 z4U@JdQ1GRG-@UP>?UZZG&wagi0gvDKJgEWNe}|mo$G!pIGTMahDw;+i!b`WWK|=s8Zht z2q^V0NRL#;lx?DMJd+-MgO=(1oYG!GcRwI70R7Q3{WdIS#+7Al-&L88Z(|nYd{Xt& zLVw(zCPN4=v7}Fc1VyYI!g)rsNiA5c;(c_ z3nYSWE`XOdvjWK)DKO{bZyaMSG*Ba0iNF-Q2Gb%sw`jbBuwD}XvaaQ`OfY!=gJPlZ z@bKcahpYGjq829oT3I7^KU3Y`!q7w^`yODpQ;B*9aKcYOC81J&1y_NBQ|hWz-J`9)kM5%8lQ^yAhOO9 z2DTPBe0;LOf+Drxjp|$NjbYCLxO}rkYY_0>gR0QV5b&r)yB!rD_;2Jx1wP+*_N#x{ zG|(KK$<67ig=nJ@fs;L+4XCTC={N0;C}7l?@_TQ0kxtN%f?Fwum{Qqk6pFy?#0*WScU~x_b7{&sF=pSjKtEHKp*>A_68RC)Fm^~gR zatGEPFY-GD`lWbdoH#I<+_gDnB3^!Yk6OJJ*Z;ypWCP`4@zn5XCGHHN{*2GG% zMe1~a@Hw4+>zCi76@)V(NtcR@qr2ku(q8HzLP1UeeQn_k))*jK>}6sm7E%?pW0sWw zzlP8(SsfsOQYi{b1hSe~UZzePorFF^FNh=We{{SN8v=gCK5XbB-S&C?m5MS`kSxCC zbl;Lu)l^XliZdy%Y_`a#Hpp4{tRIe!bNanJo?b;V3*HIlH%=KZ1Zml2e--u5D9I*om)RS4pv>n=)v!?0&j~2w1 zKS;MLyPQ+p+rY7Kt{l6yYYiEPvUCW;MSL`K{*Iw8W?mfEbDxb@9oBm?dQZi zYo{9%AM3|c?6Ks~7PJYyehYDhV>-0-9;*ZK>`EHsV;(WC`P3&Zc+Z05e(i`;mnkqm zNR~3okXdpn5qx2EcSjy%bQ?&I!1ZaoR`iiziZQrh#Yx@z03;W9oL7Z62X{A>Sps6f zo!jU3=ypS4Fouvo;Bji&nd4-gp3%YU?%2ZD=jqk`);=c_k?+RfXO`y$BL2Ib-H9i` zq3mW+rVg*~E45Ti9ru?vr-bj;bwH6u&iLiRMH`1umUn6P%;C4B@D>v;BE%B zjw~1l%)amsgz~QuWO7#=zjrB0pUHN${VEkk_KmPNt>@KnnD9LT5z6M&3E`*rdhS-# zP1_>DCrYyevo`Vk1YG`XU+Y9v&OY}QvNkPOkDchvej6>k;^20Ti$zZL3_3;omU_-Z zNSAppdF?bZ0@)d^wu$!LkhF)Tf)2xjhcYV9UNrymxS2ulYy6CZVUdP@<7ti50cmh9 z8PQlq>II8)Mhp*(yi4UMo6N`}(l)d(dfw{_uQR<9^DrMsfpy#B)>GxpriV*m$IyZI zk{5s}hk@LpW%(yLtLqlyQEJsK_kNGJJa*kv+cM!F&?r2B$)%=8<2ly~8S-Hy)I@8` zL&XW?%Uw$=eaeKk-7W-X`nIQmZOgW+RRUjr6hRRT8Rw_{1PXjiB_vEaFo%MsrO8C9 zvwG?iL`KZTBcXuOm?CLJ)J%~WJ$5n!dbsIoQSKDxl{7O}uXwb&F)pc`F^&Wyy$SdR zLdh~WPt%lzk=l)@s5EpFR$qCXaw|{^vj(@F<5abBRhhAGCF<a^6cH(wd_PgLhp>Sqrn!%$f0ARn{XHT>lWzBTq0Cth%B}$-y>SN(!!N z9^y#$VofmmRta`d(ie#CFwg&6sK5G3Cpel zW1_%^H$?&bC`pdIMzjGwex+=7b{8WI<6_fEOjYpX6(-44P1KD%Vufk88#jm%U~p5wqc`OI~qV4ReN$^cVH0SfigDB z9{XAPA8L9a2tnKof9oU;z3&|3mg&uBm-V*(I3TIW6{AWDy%bnuntO_FJhypNL#`z> zB&sT!8l71-&ZD!CUYJZG09>a)+SpGGC@5L5UiU;Hm(B4U?_s4o4N zcOMjWP#)(ESLnc%yoWgmC9x!R2%#pqmc;O6(-ZA9BTlY+R;Zie7sFJMZ>(yyVb5k4cNa zwtfN8J?~C5@+J4%ZX9RV;d}wMPye-gIV?0uRtAg!#bE&epk)4#B^OsuTQip*VPC%b z*!vZZ-vp`6=E_7W4qi&AO&<(IIlIf=Pv0uMCR{OJa7u?;%%Oc}F?6qdSNEO+A$D+2 zQ6RaYRnKBIQVubcs58xh%lLasIafIL_ieMOJ|GVMiPFe=P-vsCv2<9zY9Bofr^YgrXQwAfwhW7WLeMxFSa~A-3Ney1vA*vx~Dq%Rj{QRxstV-8iRuQChu|?e(QFjz_wHCL8 z-IUlR__m8H?>xe!dFF)kB3=!6gq#WmM^!|~`D%5DDNGaAYduY#!*-SJy|>+s4wmE= z!6H{? z@^oto5N0lC)CA2ai)>=79FKJ-vp?E@ns?Nx$u~5mg4hs$a2oWYaz(bpq?v!$bYxN< z7nm$kyaeeZC~&yqv`3x!1|ps8b8Uw=v%(+?I0VfI&2A%Q>c=&e7aJZuCxe^D?nPfz=jTI!^&v9lRi;6uUXbBI{EX(xsgU0Uba+j%C9-ZAy({o}X zx3y~7d4`j}L_AaAN2RCW-^-2|WQE0vYG^1*)jE4CmKD~B6vP$We%_rhc)7Kse7^4D za!sLv#bAzfqxBv%)ybH!bElCn>_VWnltR?~`T-r?_)x-)=?zrerq$!P zD;_7C#iV+tO}1E<01tKYrh&bB<>uAcKrN{ShSqZcq&j}ty!yI9<@}-Ck;nK|Wo^v! z%k8$i13ESJf&m9UA*4058}zwb%_k3z{eAZ<=(^o6CQPnt;1woznD1@wNW#mgb;0K; z9oygRRoVy0F>ev--m*E`E*ul#gm_iWmgQ!KE0rqcG-HSc?6V3j=Am9+0!E+zu^qf+ zv1LjMB)lcCzlHugtu2k5%}iBXovrLGezMy&QORLi5qv$_$!zH5V2qWe&H7fko*tJH&P3AVlY&yEO(~SZW*orN*0xXA zHcYj0+FEn*3uhwYefyZ5Ss1oazo#c+iN>_W!ZrtO62P@aWimmBCuU-TQ>2*X(zkYR zqvFIB#49VQZn};hDx`^#QY6PMzQO&i(I-KuJt7pV z8{R3dNWVji9qd`zg@kPS-|XTffe(mFoTt4&xlHT z+pFvnBNxd!Sk*k+OQF>0;3ep*WNiYDo>$3Dd>T1!mIfamKn@;t@8BJ=uWW8D;m1%% zE3(}{d}&d4-aCNM*&K3uiRW%$pF`S9h^3r58n|DAROdaR;3o#G<`x(+;f{X?A0^6p zcN2d8+2VRL!7iPd3r;_0P%$AanAIm{lqV}g@j<-89Xh&woy+L6V3Lqyrr|w7iJV3$ zig=s*vGx1q@41-LXsnNsP)2>$yaTp#(-Zhlvo;}D{1qvaMM$fd=fSUPQuJo`A+P{X zwRUnM*Y%J@*Dwm>-;ql&QgKe=aGB#QaJ=UP={COmcb;y2u-i*(s(c>AK2_-cXQ{~M z1gDY$dYd`WuTX$JQ&R^MMP~;`7bX)2XR{x20i4G9U#k>Iwy1amgGcYBV1_m<(1A^AXn3kJmNrsRXpXPNzfyWkIrlU8#R;SRy`*{Ge)i* z%+yM|r7O;1Ol0_VpS=2~-|GdWck-lWoE%UN=wpir@WA-hZ6~ zTgn#Sn)=~?z;Ffmul{G`==eYP1NzITxl12sfoX!5QoFI)*?CDb>bz z;&==%tGliR(PeDl%m^&F8=PY;ZP#a^IvWsB@nNALU6}QgpfCVBa2|G1hy?3we2IA( zRbVpTB`T!qJV=CFYh9VypSycB=6ms~QD*LZ=6<-W$9?n3(D};ye#gU^H;cf4UrV%E zTVPs1q)q9*ePyD>T!WbJi)Hf=kGB!YwT0dnMuBHurt{OnZyit=k2{!bJT ztbauLS(N`3<<|=1pD4DtKT&?IH~tFnYcBXF02s+nfIl BbAbQ= literal 0 HcmV?d00001 diff --git a/AI/Text2Sql/Sql/CreateTable.sql b/AI/Text2Sql/Sql/CreateTable.sql index 80308afe..cfe0c90f 100644 --- a/AI/Text2Sql/Sql/CreateTable.sql +++ b/AI/Text2Sql/Sql/CreateTable.sql @@ -168,4 +168,6 @@ COMMENT ON COLUMN "public"."t_crawler_structure_knowledge"."subject_name" IS ' 用得到stage_id再与其它表进行关联查询,不要直接使用学段名称进行查询。 2、如果用户需要检索科目,比如语文,数学,需要先到 select subject_id from t_crawler_subject where subject_name='语文' - 用得到subject_id再与其它表进行关联查询,不要直接使用科目名称进行查询。 \ No newline at end of file + 用得到subject_id再与其它表进行关联查询,不要直接使用科目名称进行查询。 +3、凡是涉及到行政区划,也就是gather_regionc字段的,不能直接从t_crawler_lesson表中读取,它没有这个列, +需要通过lesson_id 关联到t_crawler_lesson_school表中,再从t_crawler_lesson_school表中读取。 \ No newline at end of file diff --git a/AI/Text2Sql/YunXiao_Deepseek.py b/AI/Text2Sql/YunXiao_Deepseek.py new file mode 100644 index 00000000..57225e89 --- /dev/null +++ b/AI/Text2Sql/YunXiao_Deepseek.py @@ -0,0 +1,65 @@ +from openai import OpenAI + +# 阿里云中用来调用 deepseek v3 的密钥 +MODEL_API_KEY = "sk-01d13a39e09844038322108ecdbd1bbc" +#MODEL_NAME = "qwen-plus" +MODEL_NAME="deepseek-v3" + +# 初始化 OpenAI 客户端 +client = OpenAI( + api_key=MODEL_API_KEY, + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", +) + +def generate_sql_from_prompt(ddl: str, prompt: str) -> str: + """ + 根据 DDL 和自然语言描述生成 SQL 查询 + :param ddl: 数据库表结构的 DDL + :param prompt: 自然语言描述 + :return: 生成的 SQL 查询 + """ + # 构建完整的提示词 + full_prompt = ( + f"以下是数据库表结构的 DDL:\n\n{ddl}\n\n" + f"请根据以下描述生成 SQL 查询:\n\n{prompt}\n\n" + "生成的 SQL 查询:" + ) + + # 调用大模型 + response = client.chat.completions.create( + model=MODEL_NAME, + messages=[ + {"role": "system", "content": "你是一个专业的 SQL 生成助手,能够根据数据库表结构和自然语言描述生成正确的 SQL 查询。"}, + {"role": "user", "content": full_prompt} + ], + max_tokens=500 + ) + + # 提取生成的 SQL + if response.choices and response.choices[0].message.content: + return response.choices[0].message.content.strip() + else: + raise ValueError("未能生成 SQL 查询") + +if __name__ == '__main__': + # 读取 Sql/CreateTable.sql 文件 + with open("Sql/CreateTable.sql", "r", encoding="utf-8") as file: + ddl = file.read() + + # 自然语言描述 + prompt = "查询 2024 年每个学段下,上传课程数量排名前 10 的学校,并按行政区名称和上传课程数量排序。" + + common_prompt=''' + 要求: + 1、对于学校名称和行政区划名称为空的不要进行统计。 + 2、有行政区划列返回时,先按行政区划排序 + 3、有课程数量时,再按课程数量由高到低排序 + ''' + prompt = prompt + common_prompt + # 生成 SQL + try: + sql = generate_sql_from_prompt(ddl, prompt) + print("生成的 SQL 查询:") + print(sql) + except Exception as e: + print(f"生成 SQL 时出错:{e}") \ No newline at end of file diff --git a/AI/Text2Sql/YunXiao_Vanna.py b/AI/Text2Sql/YunXiao_Vanna.py new file mode 100644 index 00000000..ba7b81a4 --- /dev/null +++ b/AI/Text2Sql/YunXiao_Vanna.py @@ -0,0 +1,139 @@ +import os +import platform +from Text2Sql.Util.Text2SqlUtil import * +from Text2Sql.Util.PostgreSQLUtil import PostgreSQLUtil +from Text2Sql.Util.SaveToExcel import save_to_excel +from pyecharts.charts import Bar +from pyecharts import options as opts +from pyecharts.commons.utils import JsCode +import pandas as pd +import webbrowser + + +def process_data(raw_data): + """ + 处理原始数据: + 1. 过滤无效行政区 + 2. 按上传数量降序排序 + 3. 取前10名 + """ + df = pd.DataFrame(raw_data) + df = df[df['行政区划名称'].notna() & (df['行政区划名称'] != '')] + df = df.sort_values('上传课程数量', ascending=False).head(10) + return df + + +def create_top10_chart(data): + # 准备数据 + schools = data['学校名称'].tolist() + counts = data['上传课程数量'].tolist() + districts = data['行政区划名称'].tolist() + + # 生成颜色渐变(从深到浅) + colors = [f"rgb({75 + i * 15}, {115 - i * 10}, {220 - i * 20})" for i in range(10)] + + # 创建图表 + bar = Bar(init_opts=opts.InitOpts(width='1200px', height='600px')) + bar.add_xaxis(schools) + bar.add_yaxis( + series_name="上传数量", + y_axis=counts, + itemstyle_opts=opts.ItemStyleOpts(color=JsCode( + "function(params){" + f" return {colors}[params.dataIndex];" + "}" + )), + label_opts=opts.LabelOpts( + position="right", + formatter=JsCode( + "function(params){" + " return params.value + ' (' + params.name.split('').join('\\n') + ')';" + "}" + ) + ) + ) + + # 全局配置 + bar.set_global_opts( + title_opts=opts.TitleOpts( + title="上传资源数量TOP10学校排名", + subtitle="数据来源:课程资源管理系统" + ), + tooltip_opts=opts.TooltipOpts( + formatter=JsCode( + """function(params){ + return params.name + '
' + + '所属行政区:' + %s[params.dataIndex] + '
' + + '上传数量:' + params.value + '' + }""" % districts + ) + ), + xaxis_opts=opts.AxisOpts( + axislabel_opts=opts.LabelOpts( + rotate=30, + formatter=JsCode( + "function(value){" + " return value.length > 6 ? value.substring(0,6)+'...' : value;" + "}" + ) + ) + ), + yaxis_opts=opts.AxisOpts(name="上传数量(件)"), + datazoom_opts=[opts.DataZoomOpts(type_="inside")], + visualmap_opts=opts.VisualMapOpts( + min_=min(counts), + max_=max(counts), + orient="horizontal", + pos_left="center", + range_color=["#91CC75", "#5470C6"] + ) + ) + + # 反转Y轴使降序排列 + bar.reversal_axis() + + return bar + + +if __name__ == "__main__": + vn = DeepSeekVanna() + + # 开始训练 + print("开始训练...") + # 打开CreateTable.sql文件内容 + with open("Sql/CreateTable.sql", "r", encoding="utf-8") as file: + ddl = file.read() + # 训练数据 + vn.train( + ddl=ddl + ) + + # 自然语言提问 + # ''' + question1 = ''' + 查询发布时间是2024年度,每个行政区划每个学校都上传了多少课程数量, + 返回:行政区名称,学校名称,上传课程数量等属性. + ''' + question = ''' + 查询发布时间是2024年度,按学段分组,比如小学、初中、高中,每个学段中上传课程数量前10名的都是些学校, + 注意:排名是指分组内部排名,不是整体排名 + 返回: 学段,排名,行政区名称,学校名称,上传课程数量等属性. + ''' + common_prompt = ''' + 要求: + 1、行政区划为NULL 或者是空字符的不参加统计工作, + 2、有行政区划列返回时,先按行政区划排序 + 3、有课程数量时,再按课程数量由高到低排序''' + question = question + common_prompt + # 开始查询 + print("开始查询...") + # 获取完整 SQL + sql = vn.generate_sql(question) + print("生成的查询 SQL:\n", sql) + + # 执行SQL查询 + with PostgreSQLUtil() as db: + sample_data = db.execute_query(sql) + filename = "d:/导出信息.xlsx" + save_to_excel(sample_data, filename) + os.startfile(filename) diff --git a/src/main/java/Tools/Crawler/Util/BookLesson.java b/src/main/java/Tools/Crawler/Util/BookLesson.java index d78899e5..8e22e035 100644 --- a/src/main/java/Tools/Crawler/Util/BookLesson.java +++ b/src/main/java/Tools/Crawler/Util/BookLesson.java @@ -482,9 +482,6 @@ public class BookLesson { for (Record record : lessonList) { String original_school_name = record.getStr("teacher_school_name");//原始学校名称 - if (original_school_name.equals("东北师范大学东安实验学校")) { - System.out.println("Here!"); - } String teacher_name = record.getStr("teacher_name"); String organization_name = ""; String organization_no = "";