From 28f51a8e25d0b3d9f45a3340ac1e22c5a3efabe0 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 19 Aug 2025 13:41:20 +0800 Subject: [PATCH] 'commit' --- .../Config/__pycache__/Config.cpython-310.pyc | Bin 852 -> 852 bytes .../ElasticSearch/Utils/EsSearchUtil.py | 129 ++++++++++++++ .../__pycache__/EsSearchUtil.cpython-310.pyc | Bin 12791 -> 15868 bytes dsSchoolBuddy/Start.py | 164 ++---------------- 4 files changed, 147 insertions(+), 146 deletions(-) diff --git a/dsSchoolBuddy/Config/__pycache__/Config.cpython-310.pyc b/dsSchoolBuddy/Config/__pycache__/Config.cpython-310.pyc index 22c831f6d96cb94f07dd73765e83b5549a1ef46d..e1f95488be20eb743d1e832fbca43079cf56dabd 100644 GIT binary patch delta 101 zcmcb@c7=^MpO=@50SMxFmt@?W$h(a3*2FC~j8PjuZfA73#qa178Smrc8}At48Sm{H rc}oB$;N%$W8Xp?ub4v&&;_L6?>J#th=Jjdu+2jQ4hp rj1qteI5`Hp#)k&^L8QpW;ln@3I3#x$lQDms=*>p+*aCy}FB~UCxJ#fx8agVdG;~DN7oGaeSISW^ZYw<2Lqre^K_^tu z#hjo^@W_-rs10tR{s)!+%j6XLyL-W%b)QQ*eliNljP8_^Y72kWe-4Mzn% zK)2F)wCO0(HM)(?r_DeG%k&PqkS+o;MDL`FjWAteL}(J8rAD+zqpkGDqlz8_(PhxL z+=v1{-lNhLbS3Z;bQN%Mq)bzE^-)=`}b+vrA^YysUw zH^X`t(k=9M_+3Oh=!f99g*J8zsZKA_X>}VJmhIbX=gjhZLD>8&mmaOW@!Hilzg$wG zRT(`848P;e$7glC`FK_0#H@}>CwNEYx##?js{HAiR^`(dYFbw=9jUxg##w#APrqE- z`Np|w3oUy0;UW>Ha>vESW-*!)liN108eFxRWfbXg`8Qog>B=oo%WTVhJI@?KuDB{ENWz8UhD;u zn9-I*1s(49p26Ae;>6sV~^)#4npjd!2m}|ngWtgyo>5OTnX~xc+cW+JXBI_oO zC*C3IgE+w_+@1A(x1gOV!ZP?Hdg7XOo?;D{IeYd3LR-w7WoL`|fsS9gSL-t^ooIt( z8iiEA-;H&sw5pjlEKkhZfu^S#n#gzEiw$>?jT3)qsEGVHy7Wk|Q7~A>Oydsy&HZE3 zqQ#zeAY&Qn5;MI(W+0b7XbcT2CA+8nwwFXWvDNdprEWueW$s1%D7~Pa%Nv%R$q%qi z(DxD?E!s2So}S-IcDk?6zcW02%B;^-nvbb>;($)~RP($}9P6oOF`F^1nPX4I{gvf# zAUZ57A>d(ix77Vp&GwZF2Q9|jKQy;)z{x#{7PE7nN>dKc%TAlcLN8x%+AgLI>(}m` z3mzr+yWd&xTY3-_z?LdFNdRFO;gbj$^UMLz!#t?c50?x!1lusM>bg>O z3_X#JxIIhPlOFfX(j93p!7@2;pq?U2gG<;Yi{bisD8u-f-Qo*)2D(A1 zJBq_`AM`LRCQ~@bV~CwWw(f3i-JIHsTo_S?Z*Y`;h02cP z$W%Eg5y4i+0#ro}G~kG!N5)(0K7q?;25pE1#dQXa|=mCL-N7P4D)ZkNx$RQyWDJ=)>;{^pMgh7!}}enU+kA`xnCJmsW?ukbds=Lz*T2> zj!5N`B6y2tsr%Q#)ok$l(e7Px4TJ8kG3 z=N96e7oK+Bn(;ZI+jr<$JidskR_@QCq4VI+3i$I)Gy=Y?93f+oabZv>vnRRwD4#Os zPniRyD!O-kM0rF(qxz#X;frBB(mGl{9Rhp|8sd-UA<$52g^j-Q%_nNSJ==HfiPtMn zK2sYN?Cmm)^XjiV_*{V+huIeBU_}JX!KHZssIR-KWb|S+2HuN6&rA$#N^gQ`?HRuf@&0Ec1zt0{@R*v}Pp7;v6q z77rPWN8r@Wng<`|WcDqD|61Fd@!iBK*M?jVE9T(_*NH)Q3kXBd5ePfdBsP%kk{)-rq zmu#UQvOj(^fasEJ(UFc2h%(WZ1_^6(gfXDU$%x`8Y^9^vk|S;rY>ZQ&Amhu9G8JzH zIVbr2l#Hn8?c>6B;lVBY#gV{BS#vaKtRD$FK_0w)lq*{x3=(T6ib1GCxH^G@5~EIF zic_6T<&y=Rz|>@5SP+dwxpAlxRKqG#AP?W_s4UN^^^R&?#42KjMnYvLG!~{|hm4CO zVJFNcobXtLMqp(TCj#q;LiG#Om~?u|fT~H~c%MnJ|7FrXfrc<`a4gp}pzcM7egSzR z%IlueAER+Q?nJ9~Pos(Hpbw2iABmy@$|)Z7b)_5O2(P?-`ReHx`*X&DOl9=gwO39~ zKKZ#>QU77Mrv$Pk{ytYO)uRx`O64+gYV^<5t*FLu}NZQDE3J3AlPw`=$I zZcoYCM&4rOZ)2wj_iViTdP*-VmInL_e>gvYkzV~rwtXyp_m1v8yLLa2-o0yY*Y_q~NQ9x>zY@KdDt*3Q6c>d6iHBJ9XPPnJmHQv0XLtCshM_RaWs)c<4M7@An?Cpi`MQkaw*dqx1 zLic^7o_Bw^?g{v~wSWDF-k%`@>k2xPeXlWSn4Y|&>%+Uay19&mH;H_amQ14q_t|O% Z&;&eb{tsRd5hHNs{(mDBjtHrj{sSOFJtP1C delta 2660 zcma)8YiwLc6~42NyD#t4yY{ZVsUNWu->jWFf`o?J#-Wcir8rTOC}n7Ld%ZJWZ*w1; zxp&jVDL4vlM5U-C9Z?}#q?VVcKp?S|`VUd@6Obw-_;G*nrz-s5SA>uNA=5|} zyV~!bIdjgL^O|$_-`D@q%z4>tT7%E0nIEpdwsJRDCm-Bx9NUc8x|5&@qA`s{?WB{U zDMiQZw3DG3MaS&{Crh)6*6o~=r+Gyu?1EFE1w|+AqEn(JXOIp$Wm+a0C+}AH(0haq z^Wk@^oV;9nj}Yyo#!@W3rLpukB6Nh2yMOt~Cc3o@KmVTqT%&|(G{OjrY-u#g4zU=E zZxI?}4VGX@pyF(nrCA0joz1ZvPp~{svI6{yJhhf&B{sMfr)khE!(N4_fS*~@*$^8B z{s5~27bRn?#zwYcn>4Ffqe?IZf_1iUD@Jp0;3L2r+cvDTaW(-4^6W4}+5J4jiYyB9 zQ*0Xe1$KZJd5I75@>-;()iieSp>^^8nkUwxC;_@NtY*Mr6&5|K8IOhoPx)uLC7SE! zgSOo(#6BhY9Ari05efiQ>iX-XB=6`W$qKCB*Tks&rGAc7dWpovXqqOvJ{M-2QR$`| zgv#%x50G*BQF^g4jyA>sjA)qR&MIfD?Y6>nU%~Bk12KcUL`+K~Gef3(&u4rxJ`B7t z*W!WM^jw!WgSO|2656UF)Z`}vZG8mEl02848XrYcK^>?tFZgP=Z8OucIvqH#A-|Ko zo%kyjy&08XNL1xgZmW#;!nk7v&2@1IXd?`*BU?$*K1G8^@qjx#~0Vyn_n9^2p&dQ_3sb{|k z%=_kdEDID3lVHB%xsdq14#@l5hP0WMZJW#rtlRPj#nWW6cdz&>qCba6eOZ1uxc*u6 z3zB~ZACWL}cs`8#9lISo>~MO)Zu>#gb2>{O$+Kmv{u%TkYID~}^rN3dquZ80E>CA3 zi1^LQDEXa@*E{Mtsm z8?4PeW<;PkZ&gni2hr7->KawGFcq{N?gy6B5eDu-715QizFC_j$L0Ro6PcZQiVN~` z?bb_2a32GpdDW68^O{|UyMeD%L*4e8mhJBr+{pK1d;t})UxkT)Wma8D`}J(Dcil_A zxF|1-OddmD!YEkYvpz#@tXyE)p4(FKn}KI?|2Og%BR9yhoFD!3#84O!Nsfc=M!hKpng^S{t2bzIkEccSSD&i)xPsyL{f82Ny$-VAZZd+m7)Nxsux=H+= zocb40-AxDlrf7HIj;lnSl;1zFGI<)Tg?h&lmg7S|FRNu=PCSDr&C1fjTbsM>s>=Qn z?mU5jEi29=?2IBK*$>o5?p=hPrWB`ese<1tfqbmXt+%VYoB`C-=KqO~LVcOvi2FHRa{1%~btD8b`td_=^T z&;*nKSm%r(dHwOJO|cJoiwI94Jd1$Y3!`l}2xB;s#RXi)Rz1%a0`~)iI|$!KcpD*! zkV3#$U>IMsJu47DLdLGUimD?o15nnqCpEtcR8mWl^icYAx>O&lR!i1UrFyD*ba_dM z8KdGh$b<>oYqj7_D+bZPI)a7py8L+I4fsc3E*{f=jEt&$d$IQP9;ZS%9FKKvzJy8e W#{uxR!G8 diff --git a/dsSchoolBuddy/Start.py b/dsSchoolBuddy/Start.py index 47f80c7f..d2c9d485 100644 --- a/dsSchoolBuddy/Start.py +++ b/dsSchoolBuddy/Start.py @@ -29,134 +29,6 @@ client = AsyncOpenAI( # 初始化 ElasticSearch 工具 search_util = EsSearchUtil(Config.ES_CONFIG) -# 存储对话历史的字典,键为会话ID,值为对话历史列表 -conversation_history = {} - -# 存储学生信息的字典,键为用户ID,值为学生信息 -student_info = {} - -# 年级关键词词典 -GRADE_KEYWORDS = { - '一年级': ['一年级', '初一'], - '二年级': ['二年级', '初二'], - '三年级': ['三年级', '初三'], - '四年级': ['四年级'], - '五年级': ['五年级'], - '六年级': ['六年级'], - '七年级': ['七年级', '初一'], - '八年级': ['八年级', '初二'], - '九年级': ['九年级', '初三'], - '高一': ['高一'], - '高二': ['高二'], - '高三': ['高三'] -} - -# 最大对话历史轮数 -MAX_HISTORY_ROUNDS = 10 - - -# 添加函数:保存学生信息到ES -def save_student_info_to_es(user_id, info): - """将学生信息保存到Elasticsearch""" - try: - # 使用用户ID作为文档ID - doc_id = f"student_{user_id}" - # 准备文档内容 - doc = { - "user_id": user_id, - "info": info, - "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) - } - # 从连接池获取连接 - es_conn = search_util.es_pool.get_connection() - try: - # 确保索引存在,如果不存在则创建 - es_conn.index(index="student_info", id=doc_id, document=doc) - logger.info(f"学生 {user_id} 的信息已保存到ES: {info}") - finally: - # 释放连接回连接池 - search_util.es_pool.release_connection(es_conn) - except Exception as e: - logger.error(f"保存学生信息到ES失败: {str(e)}", exc_info=True) - -# 添加函数:从ES获取学生信息 -def get_student_info_from_es(user_id): - """从Elasticsearch获取学生信息""" - try: - doc_id = f"student_{user_id}" - # 从连接池获取连接 - es_conn = search_util.es_pool.get_connection() - try: - # 确保索引存在 - if es_conn.indices.exists(index=Config.ES_CONFIG.get("student_info_index")): - result = es_conn.get(index=Config.ES_CONFIG.get("student_info_index"), id=doc_id) - if result and '_source' in result: - logger.info(f"从ES获取到学生 {user_id} 的信息: {result['_source']['info']}") - return result['_source']['info'] - else: - logger.info(f"ES中没有找到学生 {user_id} 的信息") - else: - logger.info("student_info索引不存在") - finally: - # 释放连接回连接池 - search_util.es_pool.release_connection(es_conn) - except Exception as e: - # 如果文档不存在,返回空字典 - if "not_found" in str(e).lower(): - logger.info(f"学生 {user_id} 的信息在ES中不存在") - return {} - logger.error(f"从ES获取学生信息失败: {str(e)}", exc_info=True) - return {} - - -def extract_student_info(text, user_id): - """使用jieba分词提取学生信息""" - try: - # 提取年级信息 - seg_list = jieba.cut(text, cut_all=False) # 精确模式 - seg_set = set(seg_list) - - # 检查是否已有学生信息,如果没有则从ES加载 - if user_id not in student_info: - # 从ES加载学生信息 - info_from_es = get_student_info_from_es(user_id) - if info_from_es: - student_info[user_id] = info_from_es - logger.info(f"从ES加载用户 {user_id} 的信息: {info_from_es}") - else: - student_info[user_id] = {} - - # 提取并更新年级信息 - grade_found = False - for grade, keywords in GRADE_KEYWORDS.items(): - for keyword in keywords: - if keyword in seg_set: - if 'grade' not in student_info[user_id] or student_info[user_id]['grade'] != grade: - student_info[user_id]['grade'] = grade - logger.info(f"提取到用户 {user_id} 的年级信息: {grade}") - # 保存到ES - save_student_info_to_es(user_id, student_info[user_id]) - grade_found = True - break - if grade_found: - break - - # 如果文本中明确提到年级,但没有匹配到关键词,尝试直接提取数字 - if not grade_found: - import re - # 匹配"我是X年级"格式 - match = re.search(r'我是(\d+)年级', text) - if match: - grade_num = match.group(1) - grade = f"{grade_num}年级" - if 'grade' not in student_info[user_id] or student_info[user_id]['grade'] != grade: - student_info[user_id]['grade'] = grade - logger.info(f"通过正则提取到用户 {user_id} 的年级信息: {grade}") - # 保存到ES - save_student_info_to_es(user_id, student_info[user_id]) - except Exception as e: - logger.error(f"提取学生信息失败: {str(e)}", exc_info=True) - def get_system_prompt(): """获取系统提示""" @@ -195,24 +67,24 @@ async def chat(request: fastapi.Request): raise HTTPException(status_code=400, detail="查询内容不能为空") # 1. 初始化会话历史和学生信息 - if session_id not in conversation_history: - conversation_history[session_id] = [] + if session_id not in search_util.conversation_history: + search_util.conversation_history[session_id] = [] # 检查是否已有学生信息,如果没有则从ES加载 - if user_id not in student_info: + if user_id not in search_util.student_info: # 从ES加载学生信息 - info_from_es = get_student_info_from_es(user_id) + info_from_es = search_util.get_student_info_from_es(user_id) if info_from_es: - student_info[user_id] = info_from_es + search_util.student_info[user_id] = info_from_es logger.info(f"从ES加载用户 {user_id} 的信息: {info_from_es}") else: - student_info[user_id] = {} + search_util.student_info[user_id] = {} # 2. 使用jieba分词提取学生信息 - extract_student_info(query, user_id) + search_util.extract_student_info(query, user_id) # 输出调试信息 - logger.info(f"当前学生信息: {student_info.get(user_id, {})}") + logger.info(f"当前学生信息: {search_util.student_info.get(user_id, {})}") # 为用户查询生成标签并存储到ES current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) @@ -239,9 +111,9 @@ async def chat(request: fastapi.Request): # 3. 构建对话历史上下文 history_context = "" - if include_history and session_id in conversation_history: + if include_history and session_id in search_util.conversation_history: # 获取最近的几次对话历史 - recent_history = conversation_history[session_id][-MAX_HISTORY_ROUNDS:] + recent_history = search_util.conversation_history[session_id][-search_util.MAX_HISTORY_ROUNDS:] if recent_history: history_context = "\n\n以下是最近的对话历史,可供参考:\n" for i, (user_msg, ai_msg) in enumerate(recent_history, 1): @@ -250,9 +122,9 @@ async def chat(request: fastapi.Request): # 4. 构建学生信息上下文 student_context = "" - if user_id in student_info and student_info[user_id]: + if user_id in search_util.student_info and search_util.student_info[user_id]: student_context = "\n\n学生基础信息:\n" - for key, value in student_info[user_id].items(): + for key, value in search_util.student_info[user_id].items(): if key == 'grade': student_context += f"- 年级: {value}\n" else: @@ -262,9 +134,9 @@ async def chat(request: fastapi.Request): system_prompt = get_system_prompt() # 添加学生信息到系统提示词 - if user_id in student_info and student_info[user_id]: + if user_id in search_util.student_info and search_util.student_info[user_id]: student_info_str = "\n\n学生基础信息:\n" - for key, value in student_info[user_id].items(): + for key, value in search_util.student_info[user_id].items(): if key == 'grade': student_info_str += f"- 年级: {value}\n" else: @@ -305,7 +177,7 @@ async def chat(request: fastapi.Request): # 保存回答到ES和对话历史 if full_answer: answer_text = ''.join(full_answer) - extract_student_info(answer_text, user_id) + search_util.extract_student_info(answer_text, user_id) try: # 为回答添加标签 answer_tags = [f"{user_id}_answer", f"time:{current_time.split()[0]}", f"session:{session_id}"] @@ -321,10 +193,10 @@ async def chat(request: fastapi.Request): logger.info(f"用户 {user_id} 的回答已存储到ES") # 更新对话历史 - conversation_history[session_id].append((query, answer_text)) + search_util.conversation_history[session_id].append((query, answer_text)) # 保持历史记录不超过最大轮数 - if len(conversation_history[session_id]) > MAX_HISTORY_ROUNDS: - conversation_history[session_id].pop(0) + if len(search_util.conversation_history[session_id]) > search_util.MAX_HISTORY_ROUNDS: + search_util.conversation_history[session_id].pop(0) except Exception as e: logger.error(f"存储回答到ES失败: {str(e)}")