# -*- coding: utf-8 -*- """ 数学题目分析系统 v5.0(离线可用版) 功能特性: 1. 本地规则引擎为主 + 大模型增强(可选) 2. 自动Neo4j数据清洗 3. 完善的错误处理 4. 详细的运行日志 """ import hashlib import json import re from typing import Dict, List import jieba import requests from py2neo import Graph from Config import * # 初始化分词器 jieba.initialize() # ================== 配置区 ================== class Config: LLM_ENABLED = True # 设置为True启用大模型 LLM_TIMEOUT = 10 # 系统参数 MAX_CONTENT_LENGTH = 500 # ================== 知识库模块 ================== class LocalKnowledgeBase: """本地知识规则库""" RULES = { 'arithmetic': { 'name': '四则运算', 'keywords': ['买', '卖', '元', '还剩', '单价', '总价'], 'knowledge': ['四则运算应用(购物问题)'], 'literacy': ['数感培养'] }, 'travel': { 'name': '行程问题', 'keywords': ['相遇', '速度', '距离', '时间', '出发'], 'knowledge': ['相遇问题解决方案'], 'literacy': ['空间观念'] }, 'work': { 'name': '工程问题', 'keywords': ['合作', '效率', '工期', '完成', '单独'], 'knowledge': ['工程合作效率计算'], 'literacy': ['模型思想'] }, 'geometry': { 'name': '几何问题', 'keywords': ['面积', '周长', '体积', '平方', '立方'], 'knowledge': ['几何图形面积计算'], 'literacy': ['空间观念'] }, 'ratio': { 'name': '比例问题', 'keywords': ['百分比', '浓度', '稀释', '配比'], 'knowledge': ['浓度问题配比计算'], 'literacy': ['数据分析'] } } @classmethod def analyze(cls, content: str) -> dict: """本地规则分析""" result = { 'problem_types': [], 'knowledge_points': [], 'literacy_points': [] } words = set(jieba.cut(content)) for ptype, config in cls.RULES.items(): matches = words & set(config['keywords']) if len(matches) >= 2: result['problem_types'].append(ptype) result['knowledge_points'].extend(config['knowledge']) result['literacy_points'].extend(config['literacy']) return result # ================== 大模型模块 ================== class LLMClient: """大模型服务客户端(可选)""" def __init__(self): self.enabled = Config.LLM_ENABLED self.base_url = MODEL_API_URL self.headers = { "Authorization": f"Bearer {MODEL_API_KEY}", "Content-Type": "application/json" } def analyze_problem(self, content: str) -> dict: """大模型分析(可选增强)""" if not self.enabled: return {} try: payload = { "model": MODEL_NAME, "messages": [{ "role": "user", "content": f"分析数学题目:{content}" }], "temperature": 0.3, "max_tokens": 300 } response = requests.post( f"{self.base_url}/chat/completions", headers=self.headers, json=payload, timeout=Config.LLM_TIMEOUT ) response.raise_for_status() return self._parse_response(response.json()) except Exception as e: print(f"⚠️ 大模型分析失败: {str(e)}") return {} def _parse_response(self, data: dict) -> dict: """解析大模型响应""" try: content = data['choices'][0]['message']['content'] return json.loads(content) except: return {} # ================== 知识图谱模块 ================== class KnowledgeManager: """知识图谱管理器""" def __init__(self): self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) self._clean_data() self.knowledge_map = self._load_knowledge() self.literacy_map = self._load_literacy() def _clean_data(self): """数据清洗""" self.graph.run(""" MATCH (n) WHERE n.name CONTAINS '测试' OR n.id IS NULL DETACH DELETE n """) def _load_knowledge(self) -> Dict[str, str]: """加载知识点""" result = self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name") return {rec['n.id']: rec['n.name'] for rec in result} def _load_literacy(self) -> Dict[str, str]: """加载素养点""" result = self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title") return {rec['n.value']: rec['n.title'] for rec in result} def store_analysis(self, question_id: str, content: str, knowledge: List[str], literacy: List[str]): """存储分析结果""" try: # 创建题目节点 self.graph.run( f"MERGE (q:Question {{id: '{question_id}', content: '{content}'}})" ) # 关联知识点 for kp_name in knowledge: kp_id = next((k for k, v in self.knowledge_map.items() if v == kp_name), None) if kp_id: self.graph.run(f""" MERGE (kp:KnowledgePoint {{id: '{kp_id}'}}) WITH q, kp MATCH (q:Question {{id: '{question_id}'}}) MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp) """) # 关联素养点 for lit_name in literacy: lit_id = next((k for k, v in self.literacy_map.items() if v == lit_name), None) if lit_id: self.graph.run(f""" MERGE (lp:LiteracyNode {{value: '{lit_id}'}}) WITH q, lp MATCH (q:Question {{id: '{question_id}'}}) MERGE (q)-[:DEVELOPS_LITERACY]->(lp) """) except Exception as e: print(f"❌ 存储失败: {str(e)}") # ================== 核心逻辑模块 ================== class ProblemAnalyzer: """题目分析引擎""" def __init__(self, content: str): self.original = content self.content = self._preprocess(content) self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12] self.kg = KnowledgeManager() self.llm = LLMClient() def _preprocess(self, text: str) -> str: """文本预处理""" return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH] def analyze(self) -> dict: """执行分析流程""" # 本地规则分析 local_result = LocalKnowledgeBase.analyze(self.content) # 大模型增强分析 llm_result = self.llm.analyze_problem(self.original) # 结果融合 return { "problem_id": self.question_id, "problem_types": list(set( local_result.get('problem_types', []) + llm_result.get('problem_types', []) ))[:3], "knowledge_points": list(set( local_result.get('knowledge_points', []) + llm_result.get('knowledge_points', []) ))[:2], "literacy_points": list(set( local_result.get('literacy_points', []) + llm_result.get('literacy_points', []) ))[:2] } def execute(self): """执行完整流程""" print(f"\n🔍 开始分析题目:{self.original[:50]}...") analysis = self.analyze() print("\n📊 分析报告:") print(f" 题型识别:{analysis.get('problem_types', [])}") print(f" 推荐知识点:{analysis.get('knowledge_points', [])}") print(f" 关联素养点:{analysis.get('literacy_points', [])}") # 存储到知识图谱 self.kg.store_analysis( question_id=analysis['problem_id'], content=self.content, knowledge=analysis.get('knowledge_points', []), literacy=analysis.get('literacy_points', []) ) print("✅ 数据存储完成") # ================== 测试用例 ================== if __name__ == '__main__': test_cases = [ "小明用50元买了3本笔记本,每本8元,还剩多少钱?", "甲乙两车相距300公里,甲车速度60km/h,乙车40km/h,几小时后相遇?", "一项工程甲队单独做需要10天,乙队需要15天,两队合作需要多少天?", "一个长方形长8cm,宽5cm,求面积和周长", "含盐20%的盐水500克,要配成15%的盐水,需加水多少克?" ] for question in test_cases: print("\n" + "=" * 80) analyzer = ProblemAnalyzer(question) analyzer.execute()