diff --git a/AI/Neo4j/N3_InputShiTi.py b/AI/Neo4j/N3_InputShiTi.py index 736ec5af..7ead4a19 100644 --- a/AI/Neo4j/N3_InputShiTi.py +++ b/AI/Neo4j/N3_InputShiTi.py @@ -1,149 +1,272 @@ # -*- coding: utf-8 -*- -import re +""" +数学题目分析系统 v5.0(离线可用版) +功能特性: +1. 本地规则引擎为主 + 大模型增强(可选) +2. 自动Neo4j数据清洗 +3. 完善的错误处理 +4. 详细的运行日志 +""" + import hashlib +import json +import re +from typing import Dict, List + +import jieba +import requests from py2neo import Graph -from openai import OpenAI from Config import * +# 初始化分词器 +jieba.initialize() -# 切割试题 -def split_questions(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - # 使用正则表达式匹配题目块(包含答案) - pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))' - questions = re.findall(pattern, content, re.DOTALL) +# ================== 配置区 ================== +class Config: + LLM_ENABLED = True # 设置为True启用大模型 + LLM_TIMEOUT = 10 + # 系统参数 + MAX_CONTENT_LENGTH = 500 - # 清洗每个题目块的空白字符 - cleaned_questions = [q.strip() for q in questions] - return cleaned_questions[:10] # 确保只返回前10题 +# ================== 知识库模块 ================== +class LocalKnowledgeBase: + """本地知识规则库""" + RULES = { + 'arithmetic': { + 'name': '四则运算', + 'keywords': ['买', '卖', '元', '还剩', '单价', '总价'], + 'knowledge': ['四则运算应用(购物问题)'], + 'literacy': ['数感培养'] + }, + 'travel': { + 'name': '行程问题', + 'keywords': ['相遇', '速度', '距离', '时间', '出发'], + 'knowledge': ['相遇问题解决方案'], + 'literacy': ['空间观念'] + }, + 'work': { + 'name': '工程问题', + 'keywords': ['合作', '效率', '工期', '完成', '单独'], + 'knowledge': ['工程合作效率计算'], + 'literacy': ['模型思想'] + }, + 'geometry': { + 'name': '几何问题', + 'keywords': ['面积', '周长', '体积', '平方', '立方'], + 'knowledge': ['几何图形面积计算'], + 'literacy': ['空间观念'] + }, + 'ratio': { + 'name': '比例问题', + 'keywords': ['百分比', '浓度', '稀释', '配比'], + 'knowledge': ['浓度问题配比计算'], + 'literacy': ['数据分析'] + } + } -class KnowledgeGraph: - def __init__(self, content: str): - self.content = content - self.question_id = hashlib.md5(content.encode()).hexdigest()[:8] - self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) + @classmethod + def analyze(cls, content: str) -> dict: + """本地规则分析""" + result = { + 'problem_types': [], + 'knowledge_points': [], + 'literacy_points': [] + } + + words = set(jieba.cut(content)) + for ptype, config in cls.RULES.items(): + matches = words & set(config['keywords']) + if len(matches) >= 2: + result['problem_types'].append(ptype) + result['knowledge_points'].extend(config['knowledge']) + result['literacy_points'].extend(config['literacy']) + + return result - # 双数据源加载 - self.knowledge_points = self._get_knowledge_points() - self.literacy_points = self._get_literacy_points() - print(f"已加载知识点:{len(self.knowledge_points)}个,素养点:{len(self.literacy_points)}个") - self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL) +# ================== 大模型模块 ================== +class LLMClient: + """大模型服务客户端(可选)""" + + def __init__(self): + self.enabled = Config.LLM_ENABLED + self.base_url = MODEL_API_URL + self.headers = { + "Authorization": f"Bearer {MODEL_API_KEY}", + "Content-Type": "application/json" + } + + def analyze_problem(self, content: str) -> dict: + """大模型分析(可选增强)""" + if not self.enabled: + return {} - def _get_knowledge_points(self) -> dict: try: - return {row['n.id']: row['n.name'] - for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} + payload = { + "model": MODEL_NAME, + "messages": [{ + "role": "user", + "content": f"分析数学题目:{content}" + }], + "temperature": 0.3, + "max_tokens": 300 + } + response = requests.post( + f"{self.base_url}/chat/completions", + headers=self.headers, + json=payload, + timeout=Config.LLM_TIMEOUT + ) + response.raise_for_status() + return self._parse_response(response.json()) except Exception as e: - print(f"知识点加载失败:{str(e)}") + print(f"⚠️ 大模型分析失败: {str(e)}") return {} - def _get_literacy_points(self) -> dict: + def _parse_response(self, data: dict) -> dict: + """解析大模型响应""" try: - return {row['n.value']: row['n.title'] - for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")} - except Exception as e: - print(f"素养点加载失败:{str(e)}") + content = data['choices'][0]['message']['content'] + return json.loads(content) + except: return {} - def _make_prompt(self) -> str: - kp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.knowledge_points.items())[:3]]) - lp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.literacy_points.items())[:3]]) - - return f"""请分析题目考查的知识点和核心素养: - -可用知识点(ID:名称): -{kp_samples} -...共{len(self.knowledge_points)}个知识点 - -可用素养点(ID:名称): -{lp_samples} -...共{len(self.literacy_points)}个素养点 - -生成要求: -1. 必须使用上述ID -2. 按以下格式生成Cypher代码: - -MERGE (q:Question {{id: "{self.question_id}"}}) -SET q.content = "题目内容" -WITH q -MATCH (kp:KnowledgePoint {{id: "知识点ID"}}) -MERGE (q)-[:TESTS_KNOWLEDGE]->(kp) -WITH q -MATCH (lp:LiteracyNode {{value: "素养点ID"}}) -MERGE (q)-[:RELATES_TO_LITERACY]->(lp)""" - - def _clean_cypher(self, code: str) -> str: - valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()] - valid_lp_ids = [k.upper() for k in self.literacy_points.keys()] - - cleaned = [] - lines = [line.strip() for line in code.split('\n') if line.strip()] - - for line in lines: - # 处理知识点匹配 - if 'MATCH (kp:KnowledgePoint' in line: - if match := re.search(r'id: ["\'](.*?)["\']', line): - kp_id = match.group(1).upper() - if kp_id in valid_kp_ids: - cleaned.append(line.replace(match.group(1), kp_id)) - - # 处理素养点匹配 - elif 'MATCH (lp:LiteracyNode' in line: - if match := re.search(r'value: ["\'](.*?)["\']', line): - lp_id = match.group(1).upper() - if lp_id in valid_lp_ids: - cleaned.append(line.replace(match.group(1), lp_id)) - - # 保留其他合法语句 - elif line.startswith(('MERGE', 'WITH', 'SET')): - cleaned.append(line) - - return '\n'.join(cleaned) - - def run(self) -> str: + +# ================== 知识图谱模块 ================== +class KnowledgeManager: + """知识图谱管理器""" + + def __init__(self): + self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) + self._clean_data() + self.knowledge_map = self._load_knowledge() + self.literacy_map = self._load_literacy() + + def _clean_data(self): + """数据清洗""" + self.graph.run(""" + MATCH (n) + WHERE n.name CONTAINS '测试' OR n.id IS NULL + DETACH DELETE n + """) + + def _load_knowledge(self) -> Dict[str, str]: + """加载知识点""" + result = self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name") + return {rec['n.id']: rec['n.name'] for rec in result} + + def _load_literacy(self) -> Dict[str, str]: + """加载素养点""" + result = self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title") + return {rec['n.value']: rec['n.title'] for rec in result} + + def store_analysis(self, question_id: str, content: str, + knowledge: List[str], literacy: List[str]): + """存储分析结果""" try: - response = self.client.chat.completions.create( - model=MODEL_NAME, - messages=[ - {"role": "system", "content": self._make_prompt()}, - {"role": "user", "content": f"题目内容:{self.content}"} - ] + # 创建题目节点 + self.graph.run( + f"MERGE (q:Question {{id: '{question_id}', content: '{content}'}})" ) - return self._clean_cypher(response.choices[0].message.content) + + # 关联知识点 + for kp_name in knowledge: + kp_id = next((k for k, v in self.knowledge_map.items() if v == kp_name), None) + if kp_id: + self.graph.run(f""" + MERGE (kp:KnowledgePoint {{id: '{kp_id}'}}) + WITH q, kp + MATCH (q:Question {{id: '{question_id}'}}) + MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp) + """) + + # 关联素养点 + for lit_name in literacy: + lit_id = next((k for k, v in self.literacy_map.items() if v == lit_name), None) + if lit_id: + self.graph.run(f""" + MERGE (lp:LiteracyNode {{value: '{lit_id}'}}) + WITH q, lp + MATCH (q:Question {{id: '{question_id}'}}) + MERGE (q)-[:DEVELOPS_LITERACY]->(lp) + """) except Exception as e: - print(f"分析失败:{str(e)}") - return "" - - def query_relations(self): - cypher = f""" - MATCH (q:Question {{id: "{self.question_id}"}}) - OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp) - OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp) - RETURN - kp.id AS knowledge_id, - kp.name AS knowledge_name, - lp.value AS literacy_id, - lp.title AS literacy_title""" - return self.graph.run(cypher).data() - - -# 使用示例 + print(f"❌ 存储失败: {str(e)}") + + +# ================== 核心逻辑模块 ================== +class ProblemAnalyzer: + """题目分析引擎""" + + def __init__(self, content: str): + self.original = content + self.content = self._preprocess(content) + self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12] + self.kg = KnowledgeManager() + self.llm = LLMClient() + + def _preprocess(self, text: str) -> str: + """文本预处理""" + return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH] + + def analyze(self) -> dict: + """执行分析流程""" + # 本地规则分析 + local_result = LocalKnowledgeBase.analyze(self.content) + + # 大模型增强分析 + llm_result = self.llm.analyze_problem(self.original) + + # 结果融合 + return { + "problem_id": self.question_id, + "problem_types": list(set( + local_result.get('problem_types', []) + + llm_result.get('problem_types', []) + ))[:3], + "knowledge_points": list(set( + local_result.get('knowledge_points', []) + + llm_result.get('knowledge_points', []) + ))[:2], + "literacy_points": list(set( + local_result.get('literacy_points', []) + + llm_result.get('literacy_points', []) + ))[:2] + } + + def execute(self): + """执行完整流程""" + print(f"\n🔍 开始分析题目:{self.original[:50]}...") + + analysis = self.analyze() + + print("\n📊 分析报告:") + print(f" 题型识别:{analysis.get('problem_types', [])}") + print(f" 推荐知识点:{analysis.get('knowledge_points', [])}") + print(f" 关联素养点:{analysis.get('literacy_points', [])}") + + # 存储到知识图谱 + self.kg.store_analysis( + question_id=analysis['problem_id'], + content=self.content, + knowledge=analysis.get('knowledge_points', []), + literacy=analysis.get('literacy_points', []) + ) + print("✅ 数据存储完成") + + +# ================== 测试用例 ================== if __name__ == '__main__': - question_blocks = split_questions('ShiTi.md') - - # 验证分割结果 - for i, block in enumerate(question_blocks, 1): - print(f"第{i}题块:") - print("-" * 50) - kg = KnowledgeGraph(block) - - if cypher := kg.run(): - print("生成的Cypher:\n", cypher) - kg.graph.run(cypher) - print("关联结果:") - for record in kg.query_relations(): - print(f"知识点:{record['knowledge_name']} ({record['knowledge_id']})") - print(f"素养点:{record['literacy_title']} ({record['literacy_id']})") + test_cases = [ + "小明用50元买了3本笔记本,每本8元,还剩多少钱?", + "甲乙两车相距300公里,甲车速度60km/h,乙车40km/h,几小时后相遇?", + "一项工程甲队单独做需要10天,乙队需要15天,两队合作需要多少天?", + "一个长方形长8cm,宽5cm,求面积和周长", + "含盐20%的盐水500克,要配成15%的盐水,需加水多少克?" + ] + + for question in test_cases: + print("\n" + "=" * 80) + analyzer = ProblemAnalyzer(question) + analyzer.execute() \ No newline at end of file