'commit'

2025-08-19 13:22:23 +08:00
parent 0b7f846637
commit 35c5abd81a
1 changed files with 183 additions and 6 deletions
--- a/dsSchoolBuddy/Start.py
+++ b/dsSchoolBuddy/Start.py
@@ -32,17 +32,153 @@ search_util = EsSearchUtil(Config.ES_CONFIG)
 # 存储对话历史的字典，键为会话ID，值为对话历史列表
 conversation_history = {}

+# 存储学生信息的字典，键为用户ID，值为学生信息
+student_info = {}
+
+# 年级关键词词典
+GRADE_KEYWORDS = {
+    '一年级': ['一年级', '初一'],
+    '二年级': ['二年级', '初二'],
+    '三年级': ['三年级', '初三'],
+    '四年级': ['四年级'],
+    '五年级': ['五年级'],
+    '六年级': ['六年级'],
+    '七年级': ['七年级', '初一'],
+    '八年级': ['八年级', '初二'],
+    '九年级': ['九年级', '初三'],
+    '高一': ['高一'],
+    '高二': ['高二'],
+    '高三': ['高三']
+}
+
 # 最大对话历史轮数
 MAX_HISTORY_ROUNDS = 10


+# 添加函数：保存学生信息到ES
+def save_student_info_to_es(user_id, info):
+    """将学生信息保存到Elasticsearch"""
+    try:
+        # 使用用户ID作为文档ID
+        doc_id = f"student_{user_id}"
+        # 准备文档内容
+        doc = {
+            "user_id": user_id,
+            "info": info,
+            "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
+        }
+        # 从连接池获取连接
+        es_conn = search_util.es_pool.get_connection()
+        try:
+            # 确保索引存在，如果不存在则创建
+            if not es_conn.indices.exists(index="student_info"):
+                es_conn.indices.create(
+                    index="student_info",
+                    body={
+                        "mappings": {
+                            "properties": {
+                                "user_id": {"type": "keyword"},
+                                "info": {"type": "object"},
+                                "update_time": {"type": "date", "format": "yyyy-MM-dd HH:mm:ss"}
+                            }
+                        }
+                    }
+                )
+                logger.info("创建student_info索引成功")
+            es_conn.index(index="student_info", id=doc_id, document=doc)
+            logger.info(f"学生 {user_id} 的信息已保存到ES: {info}")
+        finally:
+            # 释放连接回连接池
+            search_util.es_pool.release_connection(es_conn)
+    except Exception as e:
+        logger.error(f"保存学生信息到ES失败: {str(e)}", exc_info=True)
+
+# 添加函数：从ES获取学生信息
+def get_student_info_from_es(user_id):
+    """从Elasticsearch获取学生信息"""
+    try:
+        doc_id = f"student_{user_id}"
+        # 从连接池获取连接
+        es_conn = search_util.es_pool.get_connection()
+        try:
+            # 确保索引存在
+            if es_conn.indices.exists(index="student_info"):
+                result = es_conn.get(index="student_info", id=doc_id)
+                if result and '_source' in result:
+                    logger.info(f"从ES获取到学生 {user_id} 的信息: {result['_source']['info']}")
+                    return result['_source']['info']
+                else:
+                    logger.info(f"ES中没有找到学生 {user_id} 的信息")
+            else:
+                logger.info("student_info索引不存在")
+        finally:
+            # 释放连接回连接池
+            search_util.es_pool.release_connection(es_conn)
+    except Exception as e:
+        # 如果文档不存在，返回空字典
+        if "not_found" in str(e).lower():
+            logger.info(f"学生 {user_id} 的信息在ES中不存在")
+            return {}
+        logger.error(f"从ES获取学生信息失败: {str(e)}", exc_info=True)
+    return {}
+
+
+def extract_student_info(text, user_id):
+    """使用jieba分词提取学生信息"""
+    try:
+        # 提取年级信息
+        seg_list = jieba.cut(text, cut_all=False)  # 精确模式
+        seg_set = set(seg_list)
+
+        # 检查是否已有学生信息，如果没有则从ES加载
+        if user_id not in student_info:
+            # 从ES加载学生信息
+            info_from_es = get_student_info_from_es(user_id)
+            if info_from_es:
+                student_info[user_id] = info_from_es
+                logger.info(f"从ES加载用户 {user_id} 的信息: {info_from_es}")
+            else:
+                student_info[user_id] = {}
+
+        # 提取并更新年级信息
+        grade_found = False
+        for grade, keywords in GRADE_KEYWORDS.items():
+            for keyword in keywords:
+                if keyword in seg_set:
+                    if 'grade' not in student_info[user_id] or student_info[user_id]['grade'] != grade:
+                        student_info[user_id]['grade'] = grade
+                        logger.info(f"提取到用户 {user_id} 的年级信息: {grade}")
+                        # 保存到ES
+                        save_student_info_to_es(user_id, student_info[user_id])
+                    grade_found = True
+                    break
+            if grade_found:
+                break
+
+        # 如果文本中明确提到年级，但没有匹配到关键词，尝试直接提取数字
+        if not grade_found:
+            import re
+            # 匹配"我是X年级"格式
+            match = re.search(r'我是(\d+)年级', text)
+            if match:
+                grade_num = match.group(1)
+                grade = f"{grade_num}年级"
+                if 'grade' not in student_info[user_id] or student_info[user_id]['grade'] != grade:
+                    student_info[user_id]['grade'] = grade
+                    logger.info(f"通过正则提取到用户 {user_id} 的年级信息: {grade}")
+                    # 保存到ES
+                    save_student_info_to_es(user_id, student_info[user_id])
+    except Exception as e:
+        logger.error(f"提取学生信息失败: {str(e)}", exc_info=True)
+
+
 def get_system_prompt():
    """获取系统提示"""
    return """
    你是一位平易近人且教学方法灵活的教师，通过引导学生自主学习来帮助他们掌握知识。

    严格遵循以下教学规则：
-    1. 首先了解学生情况：在开始讲解前，询问学生的年级水平和对询问知识的了解程度。
+    1. 基于学生情况调整教学：如果已了解学生的年级水平和知识背景，应基于此调整教学内容和难度。
    2. 基于现有知识构建：将新思想与学生已有的知识联系起来。
    3. 引导而非灌输：使用问题、提示和小步骤，让学生自己发现答案。
    4. 检查和强化：在讲解难点后，确认学生能够重述或应用这些概念。
@@ -72,11 +208,27 @@ async def chat(request: fastapi.Request):
        if not query:
            raise HTTPException(status_code=400, detail="查询内容不能为空")

-        # 1. 初始化会话历史
+        # 1. 初始化会话历史和学生信息
        if session_id not in conversation_history:
            conversation_history[session_id] = []

-        # 2. 为用户查询生成标签并存储到ES
+        # 检查是否已有学生信息，如果没有则从ES加载
+        if user_id not in student_info:
+            # 从ES加载学生信息
+            info_from_es = get_student_info_from_es(user_id)
+            if info_from_es:
+                student_info[user_id] = info_from_es
+                logger.info(f"从ES加载用户 {user_id} 的信息: {info_from_es}")
+            else:
+                student_info[user_id] = {}
+
+        # 2. 使用jieba分词提取学生信息
+        extract_student_info(query, user_id)
+
+        # 输出调试信息
+        logger.info(f"当前学生信息: {student_info.get(user_id, {})}")
+
+        # 为用户查询生成标签并存储到ES
        current_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        tags = [user_id, f"time:{current_time.split()[0]}", f"session:{session_id}"]

@@ -110,15 +262,39 @@ async def chat(request: fastapi.Request):
                    history_context += f"[对话 {i}] 用户: {user_msg}\n"
                    history_context += f"[对话 {i}] 老师: {ai_msg}\n"

-        # 4. 构建提示词
+        # 4. 构建学生信息上下文
+        student_context = ""
+        if user_id in student_info and student_info[user_id]:
+            student_context = "\n\n学生基础信息：\n"
+            for key, value in student_info[user_id].items():
+                if key == 'grade':
+                    student_context += f"- 年级: {value}\n"
+                else:
+                    student_context += f"- {key}: {value}\n"
+
+        # 5. 构建提示词
        system_prompt = get_system_prompt()

-        # 5. 流式调用大模型生成回答
+        # 添加学生信息到系统提示词
+        if user_id in student_info and student_info[user_id]:
+            student_info_str = "\n\n学生基础信息:\n"
+            for key, value in student_info[user_id].items():
+                if key == 'grade':
+                    student_info_str += f"- 年级: {value}\n"
+                else:
+                    student_info_str += f"- {key}: {value}\n"
+            system_prompt += student_info_str
+
+        # 6. 流式调用大模型生成回答
        async def generate_response_stream():
            try:
                # 构建消息列表
                messages = [{'role': 'system', 'content': system_prompt.strip()}] 

+                # 添加学生信息（如果有）
+                if student_context:
+                    messages.append({'role': 'user', 'content': student_context.strip()})
+
                # 添加历史对话（如果有）
                if history_context:
                    messages.append({'role': 'user', 'content': history_context.strip()})
@@ -143,6 +319,7 @@ async def chat(request: fastapi.Request):
                # 保存回答到ES和对话历史
                if full_answer:
                    answer_text = ''.join(full_answer)
+                    extract_student_info(answer_text, user_id)
                    try:
                        # 为回答添加标签
                        answer_tags = [f"{user_id}_answer", f"time:{current_time.split()[0]}", f"session:{session_id}"]