diff --git a/AI/Neo4j/N3_InputShiTi.py b/AI/Neo4j/N3_InputShiTi.py index 5cb4500d..05108eea 100644 --- a/AI/Neo4j/N3_InputShiTi.py +++ b/AI/Neo4j/N3_InputShiTi.py @@ -1,194 +1,162 @@ # -*- coding: utf-8 -*- """ -数学题目分析系统 v5.0(离线可用版) +数学题目分析系统 v6.1(严格匹配版) 功能特性: -1. 本地规则引擎为主 + 大模型增强(可选) -2. 自动Neo4j数据清洗 -3. 完善的错误处理 -4. 详细的运行日志 +1. 纯大模型分析 +2. 流式响应处理 +3. 严格匹配已有节点 +4. 不创建新知识点/素养点 """ -from Config import * -import hashlib -import json import re +import json +import hashlib from typing import Dict, List - -import jieba import requests from py2neo import Graph from Config import * -# 初始化分词器 -jieba.initialize() - # ================== 配置区 ================== class Config: - # 大模型配置(默认关闭) - LLM_ENABLED = True # 设置为True启用大模型 - LLM_TIMEOUT = 8 - LLM_MAX_RETRIES = 2 + # Neo4j配置 + NEO4J_URI = NEO4J_URI + NEO4J_AUTH = NEO4J_AUTH - # 系统参数 - MAX_CONTENT_LENGTH = 500 + # 大模型配置(示例为阿里云配置) + MODEL_API_URL = MODEL_API_URL + MODEL_API_KEY = MODEL_API_KEY + MODEL_NAME = MODEL_NAME + # 超时配置 + STREAM_TIMEOUT = 30 # 流式响应总超时 + CHUNK_TIMEOUT = 5 # 单次数据块等待超时 -# ================== 本地知识库 ================== -class LocalKnowledgeBase: - """本地规则分析引擎""" - RULES = { - 'arithmetic': { - 'keywords': ['买', '卖', '元', '还剩', '单价', '总价'], - 'knowledge': ['四则运算应用(购物问题)'], - 'literacy': ['数感培养'] - }, - 'travel': { - 'keywords': ['相遇', '速度', '距离', '时间', '出发'], - 'knowledge': ['相遇问题解决方案'], - 'literacy': ['空间观念'] - }, - 'work': { - 'keywords': ['合作', '效率', '工期', '完成', '单独'], - 'knowledge': ['工程合作效率计算'], - 'literacy': ['模型思想'] - }, - 'geometry': { - 'keywords': ['面积', '周长', '体积', '平方', '立方'], - 'knowledge': ['几何图形面积计算'], - 'literacy': ['空间观念'] - }, - 'ratio': { - 'keywords': ['百分比', '浓度', '稀释', '配比'], - 'knowledge': ['浓度问题配比计算'], - 'literacy': ['数据分析'] - } - } - - @classmethod - def analyze(cls, content: str) -> dict: - """本地规则分析""" - result = { - 'problem_types': [], - 'knowledge_points': [], - 'literacy_points': [] - } - - words = set(jieba.cut(content)) - for ptype, config in cls.RULES.items(): - if len(words & set(config['keywords'])) >= 2: - result['problem_types'].append(ptype) - result['knowledge_points'].extend(config['knowledge']) - result['literacy_points'].extend(config['literacy']) - return result + # 系统参数 + MAX_CONTENT_LENGTH = 500 -# ================== 大模型客户端 ================== -class LLMClient: - """增强版大模型客户端""" +# ================== 流式大模型客户端 ================== +class StreamLLMClient: + """支持流式响应的大模型客户端""" def __init__(self): - self.enabled = Config.LLM_ENABLED - self.base_url = MODEL_API_URL - self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"} + self.base_url = Config.MODEL_API_URL + self.headers = { + "Authorization": f"Bearer {Config.MODEL_API_KEY}", + "Content-Type": "application/json" + } def analyze_problem(self, content: str) -> dict: - """带重试机制的分析请求""" - if not self.enabled: - return {} - - for attempt in range(Config.LLM_MAX_RETRIES): - try: - response = requests.post( - f"{self.base_url}/chat/completions", - headers=self.headers, - json={ - "model": MODEL_NAME, - "messages": [{ - "role": "user", - "content": f"分析数学题目:{content}" - }], - "temperature": 0.3 - }, - timeout=Config.LLM_TIMEOUT - ) - response.raise_for_status() - return self._parse_response(response.json()) - except requests.exceptions.RequestException as e: - print(f"🌐 网络错误(尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}): {str(e)}") - return {} - - def _parse_response(self, response: dict) -> dict: - """解析大模型响应""" + """流式响应分析""" try: - content = response['choices'][0]['message']['content'] - return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group()) + response = requests.post( + f"{self.base_url}/chat/completions", + headers=self.headers, + json={ + "model": Config.MODEL_NAME, + "messages": [{ + "role": "user", + "content": f"""请严格按JSON格式分析数学题目: +{{ + "problem_types": ["题型列表"], + "knowledge_points": ["知识点名称(必须与数据库完全一致)"], + "literacy_points": ["素养点名称(必须与数据库完全一致)"] +}} +题目:{content}""" + }], + "temperature": 0.2, + "stream": True + }, + timeout=Config.STREAM_TIMEOUT, + stream=True + ) + response.raise_for_status() + + return self._process_stream(response) + + except requests.exceptions.RequestException as e: + print(f"🌐 网络错误: {str(e)}") + return {} except Exception as e: print(f"🔴 解析失败: {str(e)}") return {} + def _process_stream(self, response) -> dict: + """处理流式响应""" + full_content = "" + for chunk in response.iter_lines(): + if chunk: + decoded_chunk = chunk.decode('utf-8') + if decoded_chunk.startswith("data:"): + try: + chunk_data = json.loads(decoded_chunk[5:]) + content = chunk_data['choices'][0]['delta'].get('content', '') + full_content += content + except: + continue + + try: + json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group() + return json.loads(json_str) + except: + print("🔴 无法解析大模型输出") + return {} + # ================== 知识图谱管理 ================== class KnowledgeManager: - """增强版知识图谱管理器""" + """严格匹配型知识图谱管理器""" def __init__(self): - self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) - self._clean_data() + self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH) self.knowledge_map = self._load_knowledge() self.literacy_map = self._load_literacy() - def _clean_data(self): - """自动数据清洗""" - self.graph.run(""" - MATCH (n) - WHERE n.name CONTAINS '测试' OR n.id IS NULL - DETACH DELETE n - """) - def _load_knowledge(self) -> Dict[str, str]: - """加载知识点""" + """加载知识点映射(id -> name)""" return {rec['n.id']: rec['n.name'] for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} def _load_literacy(self) -> Dict[str, str]: - """加载素养点""" + """加载素养点映射(value -> title)""" return {rec['n.value']: rec['n.title'] for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")} def store_analysis(self, question_id: str, content: str, knowledge: List[str], literacy: List[str]): - """事务化存储方法""" + """使用参数化查询解决转义问题""" tx = self.graph.begin() try: - # 转义特殊字符 - safe_content = content.replace("'", "\\'") - - # 创建/更新题目节点 - tx.run(f""" - MERGE (q:Question {{id: '{question_id}'}}) - SET q.content = '{safe_content}' - """) - - # 关联知识点 + # 使用参数化查询避免转义问题 + tx.run( + "MERGE (q:Question {id: $question_id}) " + "SET q.content = $content", + { + "question_id": question_id, + "content": content + } + ) + + # 关联知识点(参数化版本) for kp_name in knowledge: if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None): - tx.run(f""" - MERGE (kp:KnowledgePoint {{id: '{kp_id}'}}) - WITH kp - MATCH (q:Question {{id: '{question_id}'}}) - MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp) - """) - - # 关联素养点 + tx.run( + "MATCH (kp:KnowledgePoint {id: $kp_id}) " + "MATCH (q:Question {id: $qid}) " + "MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)", + {"kp_id": kp_id, "qid": question_id} + ) + + # 关联素养点(参数化版本) for lit_name in literacy: if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None): - tx.run(f""" - MERGE (lp:LiteracyNode {{value: '{lit_id}'}}) - WITH lp - MATCH (q:Question {{id: '{question_id}'}}) - MERGE (q)-[:DEVELOPS_LITERACY]->(lp) - """) + tx.run( + "MATCH (lp:LiteracyNode {value: $lit_id}) " + "MATCH (q:Question {id: $qid}) " + "MERGE (q)-[:DEVELOPS_LITERACY]->(lp)", + {"lit_id": lit_id, "qid": question_id} + ) self.graph.commit(tx) print("✅ 数据存储成功") @@ -199,42 +167,27 @@ class KnowledgeManager: # ================== 核心分析引擎 ================== class ProblemAnalyzer: - """题目分析处理器""" + """纯大模型分析引擎""" def __init__(self, content: str): self.original = content self.content = self._preprocess(content) self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12] self.kg = KnowledgeManager() - self.llm = LLMClient() + self.llm = StreamLLMClient() def _preprocess(self, text: str) -> str: """文本预处理""" return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH] def analyze(self) -> dict: - """执行分析流程""" - # 本地规则分析 - local_result = LocalKnowledgeBase.analyze(self.content) - - # 大模型分析 - llm_result = self.llm.analyze_problem(self.original) - - # 结果融合 + """纯大模型分析""" + result = self.llm.analyze_problem(self.original) return { "problem_id": self.question_id, - "problem_types": list(set( - local_result.get('problem_types', []) + - llm_result.get('problem_types', []) - ))[:3], - "knowledge_points": list(set( - local_result.get('knowledge_points', []) + - llm_result.get('knowledge_points', []) - ))[:2], - "literacy_points": list(set( - local_result.get('literacy_points', []) + - llm_result.get('literacy_points', []) - ))[:2] + "problem_types": result.get('problem_types', [])[:3], + "knowledge_points": result.get('knowledge_points', [])[:2], + "literacy_points": result.get('literacy_points', [])[:2] } def execute(self): @@ -243,7 +196,7 @@ class ProblemAnalyzer: analysis = self.analyze() - print("\n📊 分析报告:") + print("\n📊 大模型分析报告:") print(f" 题型识别:{analysis.get('problem_types', [])}") print(f" 推荐知识点:{analysis.get('knowledge_points', [])}") print(f" 关联素养点:{analysis.get('literacy_points', [])}") @@ -261,14 +214,11 @@ class ProblemAnalyzer: if __name__ == '__main__': test_cases = [ "小明用50元买了3本笔记本,每本8元,还剩多少钱?", - "甲乙两车相距300公里,甲车速度60km/h,乙车40km/h,几小时后相遇?", - "一项工程甲队单独做需要10天,乙队需要15天,两队合作需要多少天?", - "一个长方形长8cm,宽5cm,求面积和周长", - "含盐20%的盐水500克,要配成15%的盐水,需加水多少克?" + "甲乙两车相距300公里,甲车速度60km/h,乙车40km/h,几小时后相遇?" ] for question in test_cases: print("\n" + "=" * 80) print(f"📚 处理题目:{question}") analyzer = ProblemAnalyzer(question) - analyzer.execute() \ No newline at end of file + analyzer.execute()