From 570322904b5fa8e6a2e8fbffc97c95daf3a7fcbc Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 25 Mar 2025 08:41:29 +0800 Subject: [PATCH] 'commit' --- AI/WxMini/Start.py | 96 +++--------------- AI/WxMini/Utils/EmbeddingUtil.py | 26 +++++ AI/WxMini/Utils/MySQLUtil.py | 36 +++++++ .../__pycache__/EmbeddingUtil.cpython-310.pyc | Bin 0 -> 1265 bytes .../__pycache__/MySQLUtil.cpython-310.pyc | Bin 0 -> 1297 bytes AI/WxMini/__pycache__/Start.cpython-310.pyc | Bin 7021 -> 5037 bytes 6 files changed, 76 insertions(+), 82 deletions(-) create mode 100644 AI/WxMini/Utils/EmbeddingUtil.py create mode 100644 AI/WxMini/Utils/MySQLUtil.py create mode 100644 AI/WxMini/Utils/__pycache__/EmbeddingUtil.cpython-310.pyc create mode 100644 AI/WxMini/Utils/__pycache__/MySQLUtil.cpython-310.pyc diff --git a/AI/WxMini/Start.py b/AI/WxMini/Start.py index 2c67c501..610bbdee 100644 --- a/AI/WxMini/Start.py +++ b/AI/WxMini/Start.py @@ -1,72 +1,24 @@ -import os -import uuid +import asyncio +import logging import time -import jieba -from fastapi import FastAPI, Form, HTTPException -from openai import AsyncOpenAI # 使用异步客户端 -from gensim.models import KeyedVectors +import uuid from contextlib import asynccontextmanager -from WxMini.Utils.OssUtil import upload_mp3_to_oss, upload_mp3_to_oss_from_memory -from WxMini.Utils.TtsUtil import TTS + +from fastapi import FastAPI, Form, HTTPException +from openai import AsyncOpenAI + +from WxMini.Milvus.Config.MulvusConfig import * from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager from WxMini.Milvus.Utils.MilvusConnectionPool import * -from WxMini.Milvus.Config.MulvusConfig import * -import asyncio # 引入异步支持 -import logging # 增加日志记录 - -import jieba.analyse +from WxMini.Utils.OssUtil import upload_mp3_to_oss_from_memory +from WxMini.Utils.TtsUtil import TTS +from WxMini.Utils.MySQLUtil import init_mysql_pool, save_chat_to_mysql +from WxMini.Utils.EmbeddingUtil import text_to_embedding # 配置日志 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) -from aiomysql import create_pool - -# MySQL 配置 -MYSQL_CONFIG = { - "host": MYSQL_HOST, - "port": MYSQL_PORT, - "user": MYSQL_USER, - "password": MYSQL_PASSWORD, - "db": MYSQL_DB_NAME, - "minsize": 1, - "maxsize": 20, -} - - -# 保存聊天记录到 MySQL -async def save_chat_to_mysql(mysql_pool, session_id, prompt, result): - async with mysql_pool.acquire() as conn: - async with conn.cursor() as cur: - await cur.execute( - "INSERT INTO t_chat_log (session_id, user_input, model_response, create_time) VALUES (%s, %s, %s, NOW())", - (session_id, prompt, result) - ) - await conn.commit() - - -# 初始化 MySQL 连接池 -async def init_mysql_pool(): - return await create_pool(**MYSQL_CONFIG) - - -# 提取用户输入的关键词 -def extract_keywords(text, topK=3): - """ - 提取用户输入的关键词 - :param text: 用户输入的文本 - :param topK: 返回的关键词数量 - :return: 关键词列表 - """ - keywords = jieba.analyse.extract_tags(text, topK=topK) - return keywords - - -# 初始化 Word2Vec 模型 -model_path = MS_MODEL_PATH -model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=MS_MODEL_LIMIT) -logger.info(f"模型加载成功,词向量维度: {model.vector_size}") - # 初始化 Milvus 连接池 milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=MS_MAX_CONNECTIONS) @@ -74,22 +26,6 @@ milvus_pool = MilvusConnectionPool(host=MS_HOST, port=MS_PORT, max_connections=M collection_name = MS_COLLECTION_NAME collection_manager = MilvusCollectionManager(collection_name) - -# 将文本转换为嵌入向量 -def text_to_embedding(text): - words = jieba.lcut(text) # 使用 jieba 分词 - logger.info(f"文本: {text}, 分词结果: {words}") - embeddings = [model[word] for word in words if word in model] - logger.info(f"有效词向量数量: {len(embeddings)}") - if embeddings: - avg_embedding = sum(embeddings) / len(embeddings) - logger.info(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维 - return avg_embedding - else: - logger.warning("未找到有效词,返回零向量") - return [0.0] * model.vector_size - - # 使用 Lifespan Events 处理应用启动和关闭逻辑 @asynccontextmanager async def lifespan(app: FastAPI): @@ -106,7 +42,6 @@ async def lifespan(app: FastAPI): await app.state.mysql_pool.wait_closed() logger.info("Milvus 和 MySQL 连接池已关闭。") - # 初始化 FastAPI 应用 app = FastAPI(lifespan=lifespan) @@ -116,7 +51,6 @@ client = AsyncOpenAI( base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", ) - @app.post("/reply") async def reply(session_id: str = Form(...), prompt: str = Form(...)): """ @@ -220,8 +154,7 @@ async def reply(session_id: str = Form(...), prompt: str = Form(...)): audio_data = await asyncio.to_thread(t.generate_audio, result) # 假设 TTS 类有一个 generate_audio 方法返回音频数据 # 将音频数据直接上传到 OSS - await asyncio.to_thread(upload_mp3_to_oss_from_memory, tts_file, - audio_data) # 假设 upload_mp3_to_oss_from_memory 支持从内存上传 + await asyncio.to_thread(upload_mp3_to_oss_from_memory, tts_file, audio_data) logger.info(f"TTS 文件已直接上传到 OSS: {tts_file}") # 完整的 URL @@ -241,9 +174,8 @@ async def reply(session_id: str = Form(...), prompt: str = Form(...)): # 释放连接 milvus_pool.release_connection(connection) - # 运行 FastAPI 应用 if __name__ == "__main__": import uvicorn - uvicorn.run("Start:app", host="0.0.0.0", port=5800, workers=1) + uvicorn.run("Start:app", host="0.0.0.0", port=5800, workers=1) \ No newline at end of file diff --git a/AI/WxMini/Utils/EmbeddingUtil.py b/AI/WxMini/Utils/EmbeddingUtil.py new file mode 100644 index 00000000..fde367c0 --- /dev/null +++ b/AI/WxMini/Utils/EmbeddingUtil.py @@ -0,0 +1,26 @@ +import logging +import jieba +from gensim.models import KeyedVectors + +# 配置日志 +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +# 初始化 Word2Vec 模型 +model_path = r"D:\Tencent_AILab_ChineseEmbedding\Tencent_AILab_ChineseEmbedding.txt" +model = KeyedVectors.load_word2vec_format(model_path, binary=False, limit=10000) +logger.info(f"模型加载成功,词向量维度: {model.vector_size}") + +# 将文本转换为嵌入向量 +def text_to_embedding(text): + words = jieba.lcut(text) # 使用 jieba 分词 + logger.info(f"文本: {text}, 分词结果: {words}") + embeddings = [model[word] for word in words if word in model] + logger.info(f"有效词向量数量: {len(embeddings)}") + if embeddings: + avg_embedding = sum(embeddings) / len(embeddings) + logger.info(f"生成的平均向量: {avg_embedding[:5]}...") # 打印前 5 维 + return avg_embedding + else: + logger.warning("未找到有效词,返回零向量") + return [0.0] * model.vector_size \ No newline at end of file diff --git a/AI/WxMini/Utils/MySQLUtil.py b/AI/WxMini/Utils/MySQLUtil.py new file mode 100644 index 00000000..147b8342 --- /dev/null +++ b/AI/WxMini/Utils/MySQLUtil.py @@ -0,0 +1,36 @@ +import asyncio +import logging +from aiomysql import create_pool +from WxMini.Milvus.Config.MulvusConfig import * + +# 配置日志 +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") +logger = logging.getLogger(__name__) + +# MySQL 配置 +MYSQL_CONFIG = { + "host": MYSQL_HOST, + "port": MYSQL_PORT, + "user": MYSQL_USER, + "password": MYSQL_PASSWORD, + "db": MYSQL_DB_NAME, + "minsize": 1, + "maxsize": 20, +} + + +# 初始化 MySQL 连接池 +async def init_mysql_pool(): + return await create_pool(**MYSQL_CONFIG) + + +# 保存聊天记录到 MySQL +async def save_chat_to_mysql(mysql_pool, session_id, prompt, result): + async with mysql_pool.acquire() as conn: + async with conn.cursor() as cur: + await cur.execute( + "INSERT INTO t_chat_log (session_id, user_input, model_response, create_time) VALUES (%s, %s, %s, NOW())", + (session_id, prompt, result) + ) + await conn.commit() + logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。") diff --git a/AI/WxMini/Utils/__pycache__/EmbeddingUtil.cpython-310.pyc b/AI/WxMini/Utils/__pycache__/EmbeddingUtil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..454f8a5853e3bcb12a399c98fe476a519c8a9de6 GIT binary patch literal 1265 zcmah|&2Jk;6rb4-d%d<(w~By54?Ym6rD7GhSy4olP$?n=p#YUuMys`FvRQXO!p=BN zEa%if(m+HjS`Or+&IJKNqEZnOl*0cpH&pGWIdO*zZ`Q3_!GV$H?Y?<$-n`%MHxrf1 zCW7(m-+#E4hR_4)JUw)9-hj!Lzz{^RkF4PrTNr%BR|3^iF`}rW!PJRLv>iqIgB;Bt zV#^@9RiJ9eqy>^Ad1CCSR*@Qm5-HGPr@V{yl|zgXF-dU)Sz`n*pjzn|PDArGEJ|a9yqp*Iz*E zYL=JltDsl@W9780Y3!@AUSC~65Y3UoPy-k9e8(6^ph0oIYd}Zomz(L{_l;_|0?xT= zvh@i-I=c5oda(B|FbX6{KnpP0889yDqYuq>+{YZPpjhG1_-S-Q`AAtn-y#rK4&t7x zob)}$+flGQbxO7&dpd)96P&Jcn@2W%taW|TkJ^sUrr_uCj_$LNo$g&rZ*7faOK;r) z@c_Pz9o@VQ!XACKk^Fo&+20%r+o&GPN77U4^?LWktipHcwcnDhJEKza9{heY**!Sk z{b{(#g}iLk3PSIAwB=Zu-|lje^CQ=#vCzD5F%qgzL!q*6Ao5q7I0PDEsfj4AWDmk- zUXRu^p-Ff`mjp6ljxaK-?5wy?Fh3zTF3Kt=|HH}&SZ3p3O#BQU!{b=P75qF{Qz_y2 zISABFh`el3wlW63Rd#8}yr7;*$zn;kD7GBtwWp(S(Q}11JAd&VQE(}r8$vT|TfSJ^ zj^%NdneDWf9lm6Z`;kLziQ_duXb)d*20513FP5!kZ9D}fyD=FaV-iYreL5wd7?b)N S$iO9BQBAC=6?_Vp)PDd`W_=O> literal 0 HcmV?d00001 diff --git a/AI/WxMini/Utils/__pycache__/MySQLUtil.cpython-310.pyc b/AI/WxMini/Utils/__pycache__/MySQLUtil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..710d61e16f4c46ccd554e51b87c1d383ac466678 GIT binary patch literal 1297 zcmZWo-D?|15Z}E!NoRf7jtT8cDOZZY7hIJ>pZZ0e*l7fo6kB#hgy6W`u9R)>Zk5|R z$2JN${QyJj7MkXzlp;aWrZ2(tB@k#|`d{o@33B5mmJ~=4zUK&TAhUD~|3Wnr%YqY7 zJET^Ra~Di}Vb`2ru!R#ckD9`G&6rtHs=yzD+1rT>| z2-q+b;`{h3Tmdw$AgDyC{R|>dAVLr99f#Ss0 zcw|9|8=WM42+AGYM*}oOZ=y8W?=Iff`q}_bcwW7o>1PHC!F}9U`f5iVDrqc65UCDN zPGS^lgA7rI7^5Z>uEG3N9Y^|wH%{Wj+qWPhp4VXgKav1or?G33vGI z!>H6Y>PBceH|)@IdDmDF&~wIpi;+bm?xn^2UKB1G9w*eb0xEjk7j)4$VN=2$==$kJ`_kTKiI2`@(#pvsU;~)1& zkAHdg=y3Gwz0v-mku=6Qx_5YV@M!ep?$faP1j^f>}se&Ix;qrnrPW zCC5wPPZ+A~&EYw$VFl|r|If5m#222I{|#P-eRB@76+6DexXi^#0RvmQ&3KYf%HCH} zDweCv-HJr{68Af-Tdqd2l^U{e-4@I_F|hLZ>|c7ZnE4O%DQYMH literal 0 HcmV?d00001 diff --git a/AI/WxMini/__pycache__/Start.cpython-310.pyc b/AI/WxMini/__pycache__/Start.cpython-310.pyc index 3c11859d1a44a3a0f3111502fb1cbd9879e18792..fd3591d33010f3eb39d697be1a54afea4bb9badf 100644 GIT binary patch delta 1322 zcmY*YO>Epm6yCADwzL1)?0PqwWV3BT!Y0jDLTwQ$KcNt$s!0oK5yF5hXXkC=bZjp( zPMQdzRB9zoIiNXkfI};gP!F7f#Hk0wi6auHkvJd@CI1=&OZl} zxOH25g?=H=g~*b-1WQ{zJb@?i6mH|`14DL=Zlqrimzr{;nrCjG&(cal{UP9Jzp=6()%{IfQ31vaX3s4|I9x$w4*HltkSVWmIPst9DZB* z2ARUxW|E{2tCuAn#u&)q<7!XqIC4qaP-a3JQQCmy)H!L9=j&Zm;}`2Y{j4gm`kQgk z2PUjg?yiWxs9kouS0o@z3T&Gs{0HZ`&a$ddR2`dXmCsTblD$jPWnc4! z#wSKiuOR+?V+t|;Yh!Nf2+34erxi7ARL!vN#Pl{$);&*OOJjAJvPHV0O5RgY?nGlG zSf^TAGw|hg4g;#I8bg_s7Be0~;?XF2B_TEea!g@Ct zQx5;UdFG1qL}qR8<{PUjm&zukMzo%&Sou$&X;o2e<&Afd!(VDGqX~Yu)!c8Yj+A~< zt)$e7;@XW@S6023U%%>ITX~yamszE1c~$SKLZ)JCdnf3}G!i=9j!sdVlAQ8uKQC3Y zP2fevBU_fn&e%x31yqv)RU);QWKcjAHS`=R>4t72P2NMUtmzukkok|DJ&WuQY~*Oh G-~R$c2w!yo delta 3353 zcmZWreQXrR72nz2`|zFZJD&|UABF`3_SxnLABA#YC&q?g;SY*UP*zl@-+MFPY5n`&4y&dnB*63QfB!iXZxw{Uu*S+K2#uHEvwv}`V|&r` zt9~<}29SUSEEz$wO0D9&Y=q3P8s@yus5WcV8qWKTS~H?X%%~dW?SN5d#?%<+gGRmC zpf+&6%2+kZY*rbo`5zgLY9q9f(WEx<{~A>y#isGKq-lH|4&m^uhy>C!x1Kc5BXt9) zR2v&jMOC186WL5!$QH5{*Wg+lxgSus;V7<~7SvW8!}aiQ!wI|!ubxI~oV2SQBs|_p zI&dS|j+C7MH0Imb;J1aLcBSx8ur#mg}iLoyOk5QRR9%I*J6d`y`%4c{{li!6`i zYG7VN4j@t&9Q1Yc6;ufT-#6L<(zr+?@bjt%Umf|}Krz)jJanZ0WiDDlwlI~5p4FYo znz(N}pwnhkiI*D?g<2&nNI=pekfn!pFqK~TxD$>rj>g4;P(T>Xic?}iWNXRa1$jtO zii%RcakF&e?vvY($}{gi{lG0<{l=3wr%P8KFWmm_)9=lKj-n)U8r4jNB~Ma$PP2UX zTKWFBpHZ`N$CAp^pWiOsd$`h6`OZ5F*KSlaDRJ^Nn^cO+6P0G(csl=xOCe!p2aLoe z&o{1GjxDeej9X`AI+B@eyMbQ?&(qLGgi9u+yw&EIn*emiat7*MTWI!W<<2Jeop| zI44173-<&p77&zOnU!5EVR;&Y*C$*-QzF;#IdVa~!ZdhcE{HVg_*hS+f1NNTaXo)Q zg1Q;FDz0HRgAXKTuON*GAob%aZoL@FASW;$Y+#uk9?G-Bt`gCtl8-{8Qz^}S5j=kK zo?Cu+KdC(94g(%@X#_rdJ&=r05I!H8g8JkL69N?-Sos>^mUtdOS3y7We7qRIev640 zyrRrP_I$)b2MyhJAiZ9$c#gGpQ{2KzK-2F5d!c0)ZLda~$b91R^LthIsZALKJ3RG6Xf>)u4N$?NXf4o? zSdHi*HjAX?ay*iczc03{&GGSWkCV{IJuxjnTLO$@kRAbr--)5L~D8jdb@EjaD|zIJ09JpfYL#ZHge2gal$$d7;Itk)fZV0oKuQ>SE&{96yPZ zj&9hAVcTX5W#z$L7u$q7+&|W>cmIGQA`QDA)+O9)b(@>%IW{!Fzz-nzYIyBVES7Hf z3$b{Sg~tnOy2TIL%0U?72Vv^y@W`mQ$R8g*vCN-@n}%0g(K+5TGIDD8M4wkvY4;sU z4fPBjrmR#~Let7PR?tCZBsDnPcX%Lmyl3<%ToK?3(gk+}%{l8e@LM0tv|W&csWFS1 zngf$8jTq=3>>uT*;z>cfnZXA4pLMI;U&gj2IJ9D*-8(!maJYB0e|V^}jw5XLK1NuU zaj4T?x30dvXfq3NSF{~kWTHE)9mr4UX^TQ}p!w_ygmA2Im;?{PvhxP%W`~H4fdqvh z+(v?s)z$osa$4yFDn_Lcsuo3Agj>tf>;F$87!HO)KDj!|l2DQ*P?g0<6osN908d2z RkAnWqC^!{Ft3~;L{|B;3rIi2x