'commit'

5 months ago · 4b15e9d24d
parent e0c4779627
commit 4b15e9d24d
1 changed files with 249 additions and 126 deletions
--- a/AI/Neo4j/N3_InputShiTi.py
+++ b/AI/Neo4j/N3_InputShiTi.py
@ -1,149 +1,272 @@
 # -*- coding: utf-8 -*-
-import re
+"""
+数学题目分析系统 v5.0（离线可用版）
+功能特性：
+1. 本地规则引擎为主 + 大模型增强（可选）
+2. 自动Neo4j数据清洗
+3. 完善的错误处理
+4. 详细的运行日志
+"""
+
 import hashlib
+import json
+import re
+from typing import Dict, List
+
+import jieba
+import requests
 from py2neo import Graph
-from openai import OpenAI
 from Config import *
+# 初始化分词器
+jieba.initialize()

-# 切割试题
-def split_questions(file_path):
-    with open(file_path, 'r', encoding='utf-8') as f:
-        content = f.read()

-    # 使用正则表达式匹配题目块（包含答案）
-    pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
-    questions = re.findall(pattern, content, re.DOTALL)
+# ================== 配置区 ==================
+class Config:
+    LLM_ENABLED = True  # 设置为True启用大模型
+    LLM_TIMEOUT = 10
+    # 系统参数
+    MAX_CONTENT_LENGTH = 500

-    # 清洗每个题目块的空白字符
-    cleaned_questions = [q.strip() for q in questions]

-    return cleaned_questions[:10]  # 确保只返回前10题
+# ================== 知识库模块 ==================
+class LocalKnowledgeBase:
+    """本地知识规则库"""
+    RULES = {
+        'arithmetic': {
+            'name': '四则运算',
+            'keywords': ['买', '卖', '元', '还剩', '单价', '总价'],
+            'knowledge': ['四则运算应用（购物问题）'],
+            'literacy': ['数感培养']
+        },
+        'travel': {
+            'name': '行程问题',
+            'keywords': ['相遇', '速度', '距离', '时间', '出发'],
+            'knowledge': ['相遇问题解决方案'],
+            'literacy': ['空间观念']
+        },
+        'work': {
+            'name': '工程问题',
+            'keywords': ['合作', '效率', '工期', '完成', '单独'],
+            'knowledge': ['工程合作效率计算'],
+            'literacy': ['模型思想']
+        },
+        'geometry': {
+            'name': '几何问题',
+            'keywords': ['面积', '周长', '体积', '平方', '立方'],
+            'knowledge': ['几何图形面积计算'],
+            'literacy': ['空间观念']
+        },
+        'ratio': {
+            'name': '比例问题',
+            'keywords': ['百分比', '浓度', '稀释', '配比'],
+            'knowledge': ['浓度问题配比计算'],
+            'literacy': ['数据分析']
+        }
+    }

-class KnowledgeGraph:
-    def __init__(self, content: str):
-        self.content = content
-        self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
-        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
+    @classmethod
+    def analyze(cls, content: str) -> dict:
+        """本地规则分析"""
+        result = {
+            'problem_types': [],
+            'knowledge_points': [],
+            'literacy_points': []
+        }
+
+        words = set(jieba.cut(content))
+        for ptype, config in cls.RULES.items():
+            matches = words & set(config['keywords'])
+            if len(matches) >= 2:
+                result['problem_types'].append(ptype)
+                result['knowledge_points'].extend(config['knowledge'])
+                result['literacy_points'].extend(config['literacy'])
+
+        return result

-        # 双数据源加载
-        self.knowledge_points = self._get_knowledge_points()
-        self.literacy_points = self._get_literacy_points()
-        print(f"已加载知识点：{len(self.knowledge_points)}个，素养点：{len(self.literacy_points)}个")

-        self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
+# ================== 大模型模块 ==================
+class LLMClient:
+    """大模型服务客户端（可选）"""
+
+    def __init__(self):
+        self.enabled = Config.LLM_ENABLED
+        self.base_url = MODEL_API_URL
+        self.headers = {
+            "Authorization": f"Bearer {MODEL_API_KEY}",
+            "Content-Type": "application/json"
+        }
+
+    def analyze_problem(self, content: str) -> dict:
+        """大模型分析（可选增强）"""
+        if not self.enabled:
+            return {}

-    def _get_knowledge_points(self) -> dict:
        try:
-            return {row['n.id']: row['n.name']
-                    for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
+            payload = {
+                "model": MODEL_NAME,
+                "messages": [{
+                    "role": "user",
+                    "content": f"分析数学题目：{content}"
+                }],
+                "temperature": 0.3,
+                "max_tokens": 300
+            }
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json=payload,
+                timeout=Config.LLM_TIMEOUT
+            )
+            response.raise_for_status()
+            return self._parse_response(response.json())
        except Exception as e:
-            print(f"知识点加载失败：{str(e)}")
+            print(f"⚠️ 大模型分析失败: {str(e)}")
            return {}

-    def _get_literacy_points(self) -> dict:
+    def _parse_response(self, data: dict) -> dict:
+        """解析大模型响应"""
        try:
-            return {row['n.value']: row['n.title']
-                    for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
-        except Exception as e:
-            print(f"素养点加载失败：{str(e)}")
+            content = data['choices'][0]['message']['content']
+            return json.loads(content)
+        except:
            return {}

-    def _make_prompt(self) -> str:
-        kp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
-        lp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.literacy_points.items())[:3]])
-
-        return f"""请分析题目考查的知识点和核心素养：
-
-可用知识点（ID:名称）：
-{kp_samples}
-...共{len(self.knowledge_points)}个知识点
-
-可用素养点（ID:名称）：
-{lp_samples}
-...共{len(self.literacy_points)}个素养点
-
-生成要求：
-1. 必须使用上述ID
-2. 按以下格式生成Cypher代码：
-
-MERGE (q:Question {{id: "{self.question_id}"}})
-SET q.content = "题目内容"
-WITH q
-MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
-MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
-WITH q
-MATCH (lp:LiteracyNode {{value: "素养点ID"}})
-MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""
-
-    def _clean_cypher(self, code: str) -> str:
-        valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
-        valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]
-
-        cleaned = []
-        lines = [line.strip() for line in code.split('\n') if line.strip()]
-
-        for line in lines:
-            # 处理知识点匹配
-            if 'MATCH (kp:KnowledgePoint' in line:
-                if match := re.search(r'id: ["\'](.*?)["\']', line):
-                    kp_id = match.group(1).upper()
-                    if kp_id in valid_kp_ids:
-                        cleaned.append(line.replace(match.group(1), kp_id))
-
-            # 处理素养点匹配
-            elif 'MATCH (lp:LiteracyNode' in line:
-                if match := re.search(r'value: ["\'](.*?)["\']', line):
-                    lp_id = match.group(1).upper()
-                    if lp_id in valid_lp_ids:
-                        cleaned.append(line.replace(match.group(1), lp_id))
-
-            # 保留其他合法语句
-            elif line.startswith(('MERGE', 'WITH', 'SET')):
-                cleaned.append(line)
-
-        return '\n'.join(cleaned)
-
-    def run(self) -> str:
+
+# ================== 知识图谱模块 ==================
+class KnowledgeManager:
+    """知识图谱管理器"""
+
+    def __init__(self):
+        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
+        self._clean_data()
+        self.knowledge_map = self._load_knowledge()
+        self.literacy_map = self._load_literacy()
+
+    def _clean_data(self):
+        """数据清洗"""
+        self.graph.run("""
+            MATCH (n) 
+            WHERE n.name CONTAINS '测试' OR n.id IS NULL
+            DETACH DELETE n
+        """)
+
+    def _load_knowledge(self) -> Dict[str, str]:
+        """加载知识点"""
+        result = self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")
+        return {rec['n.id']: rec['n.name'] for rec in result}
+
+    def _load_literacy(self) -> Dict[str, str]:
+        """加载素养点"""
+        result = self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")
+        return {rec['n.value']: rec['n.title'] for rec in result}
+
+    def store_analysis(self, question_id: str, content: str,
+                       knowledge: List[str], literacy: List[str]):
+        """存储分析结果"""
        try:
-            response = self.client.chat.completions.create(
-                model=MODEL_NAME,
-                messages=[
-                    {"role": "system", "content": self._make_prompt()},
-                    {"role": "user", "content": f"题目内容：{self.content}"}
-                ]
+            # 创建题目节点
+            self.graph.run(
+                f"MERGE (q:Question {{id: '{question_id}', content: '{content}'}})"
            )
-            return self._clean_cypher(response.choices[0].message.content)
+
+            # 关联知识点
+            for kp_name in knowledge:
+                kp_id = next((k for k, v in self.knowledge_map.items() if v == kp_name), None)
+                if kp_id:
+                    self.graph.run(f"""
+                        MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
+                        WITH q, kp
+                        MATCH (q:Question {{id: '{question_id}'}})
+                        MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
+                    """)
+
+            # 关联素养点
+            for lit_name in literacy:
+                lit_id = next((k for k, v in self.literacy_map.items() if v == lit_name), None)
+                if lit_id:
+                    self.graph.run(f"""
+                        MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
+                        WITH q, lp
+                        MATCH (q:Question {{id: '{question_id}'}})
+                        MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
+                    """)
        except Exception as e:
-            print(f"分析失败：{str(e)}")
-            return ""
-
-    def query_relations(self):
-        cypher = f"""
-        MATCH (q:Question {{id: "{self.question_id}"}})
-        OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
-        OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
-        RETURN 
-            kp.id AS knowledge_id,
-            kp.name AS knowledge_name,
-            lp.value AS literacy_id,
-            lp.title AS literacy_title"""
-        return self.graph.run(cypher).data()
-
-
-# 使用示例
+            print(f"❌ 存储失败: {str(e)}")
+
+
+# ================== 核心逻辑模块 ==================
+class ProblemAnalyzer:
+    """题目分析引擎"""
+
+    def __init__(self, content: str):
+        self.original = content
+        self.content = self._preprocess(content)
+        self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
+        self.kg = KnowledgeManager()
+        self.llm = LLMClient()
+
+    def _preprocess(self, text: str) -> str:
+        """文本预处理"""
+        return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
+
+    def analyze(self) -> dict:
+        """执行分析流程"""
+        # 本地规则分析
+        local_result = LocalKnowledgeBase.analyze(self.content)
+
+        # 大模型增强分析
+        llm_result = self.llm.analyze_problem(self.original)
+
+        # 结果融合
+        return {
+            "problem_id": self.question_id,
+            "problem_types": list(set(
+                local_result.get('problem_types', []) +
+                llm_result.get('problem_types', [])
+            ))[:3],
+            "knowledge_points": list(set(
+                local_result.get('knowledge_points', []) +
+                llm_result.get('knowledge_points', [])
+            ))[:2],
+            "literacy_points": list(set(
+                local_result.get('literacy_points', []) +
+                llm_result.get('literacy_points', [])
+            ))[:2]
+        }
+
+    def execute(self):
+        """执行完整流程"""
+        print(f"\n🔍 开始分析题目：{self.original[:50]}...")
+
+        analysis = self.analyze()
+
+        print("\n📊 分析报告：")
+        print(f"  题型识别：{analysis.get('problem_types', [])}")
+        print(f"  推荐知识点：{analysis.get('knowledge_points', [])}")
+        print(f"  关联素养点：{analysis.get('literacy_points', [])}")
+
+        # 存储到知识图谱
+        self.kg.store_analysis(
+            question_id=analysis['problem_id'],
+            content=self.content,
+            knowledge=analysis.get('knowledge_points', []),
+            literacy=analysis.get('literacy_points', [])
+        )
+        print("✅ 数据存储完成")
+
+
+# ================== 测试用例 ==================
 if __name__ == '__main__':
-    question_blocks = split_questions('ShiTi.md')
-
-    # 验证分割结果
-    for i, block in enumerate(question_blocks, 1):
-        print(f"第{i}题块：")
-        print("-" * 50)
-        kg = KnowledgeGraph(block)
-
-        if cypher := kg.run():
-            print("生成的Cypher:\n", cypher)
-            kg.graph.run(cypher)
-            print("关联结果：")
-            for record in kg.query_relations():
-                print(f"知识点：{record['knowledge_name']} ({record['knowledge_id']})")
-                print(f"素养点：{record['literacy_title']} ({record['literacy_id']})")
+    test_cases = [
+        "小明用50元买了3本笔记本，每本8元，还剩多少钱？",
+        "甲乙两车相距300公里，甲车速度60km/h，乙车40km/h，几小时后相遇？",
+        "一项工程甲队单独做需要10天，乙队需要15天，两队合作需要多少天？",
+        "一个长方形长8cm，宽5cm，求面积和周长",
+        "含盐20%的盐水500克，要配成15%的盐水，需加水多少克？"
+    ]
+
+    for question in test_cases:
+        print("\n" + "=" * 80)
+        analyzer = ProblemAnalyzer(question)
+        analyzer.execute()