QingLong/AI/Neo4j/N3_InputShiTi.py

# -*- coding: utf-8 -*-
"""
数学题目分析系统 v5.0（离线可用版）
功能特性：
1. 本地规则引擎为主 + 大模型增强（可选）
2. 自动Neo4j数据清洗
3. 完善的错误处理
4. 详细的运行日志
"""
from Config import *

import hashlib
import json
import re
from typing import Dict, List

import jieba
import requests
from py2neo import Graph
from Config import *

# 初始化分词器
jieba.initialize()


# ================== 配置区 ==================
class Config:
    # 大模型配置（默认关闭）
    LLM_ENABLED = True  # 设置为True启用大模型
    LLM_TIMEOUT = 8
    LLM_MAX_RETRIES = 2

    # 系统参数
    MAX_CONTENT_LENGTH = 500


# ================== 本地知识库 ==================
class LocalKnowledgeBase:
    """本地规则分析引擎"""
    RULES = {
        'arithmetic': {
            'keywords': ['买', '卖', '元', '还剩', '单价', '总价'],
            'knowledge': ['四则运算应用（购物问题）'],
            'literacy': ['数感培养']
        },
        'travel': {
            'keywords': ['相遇', '速度', '距离', '时间', '出发'],
            'knowledge': ['相遇问题解决方案'],
            'literacy': ['空间观念']
        },
        'work': {
            'keywords': ['合作', '效率', '工期', '完成', '单独'],
            'knowledge': ['工程合作效率计算'],
            'literacy': ['模型思想']
        },
        'geometry': {
            'keywords': ['面积', '周长', '体积', '平方', '立方'],
            'knowledge': ['几何图形面积计算'],
            'literacy': ['空间观念']
        },
        'ratio': {
            'keywords': ['百分比', '浓度', '稀释', '配比'],
            'knowledge': ['浓度问题配比计算'],
            'literacy': ['数据分析']
        }
    }

    @classmethod
    def analyze(cls, content: str) -> dict:
        """本地规则分析"""
        result = {
            'problem_types': [],
            'knowledge_points': [],
            'literacy_points': []
        }

        words = set(jieba.cut(content))
        for ptype, config in cls.RULES.items():
            if len(words & set(config['keywords'])) >= 2:
                result['problem_types'].append(ptype)
                result['knowledge_points'].extend(config['knowledge'])
                result['literacy_points'].extend(config['literacy'])
        return result


# ================== 大模型客户端 ==================
class LLMClient:
    """增强版大模型客户端"""

    def __init__(self):
        self.enabled = Config.LLM_ENABLED
        self.base_url = MODEL_API_URL
        self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"}

    def analyze_problem(self, content: str) -> dict:
        """带重试机制的分析请求"""
        if not self.enabled:
            return {}

        for attempt in range(Config.LLM_MAX_RETRIES):
            try:
                response = requests.post(
                    f"{self.base_url}/chat/completions",
                    headers=self.headers,
                    json={
                        "model": MODEL_NAME,
                        "messages": [{
                            "role": "user",
                            "content": f"分析数学题目：{content}"
                        }],
                        "temperature": 0.3
                    },
                    timeout=Config.LLM_TIMEOUT
                )
                response.raise_for_status()
                return self._parse_response(response.json())
            except requests.exceptions.RequestException as e:
                print(f"🌐 网络错误（尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}）: {str(e)}")
        return {}

    def _parse_response(self, response: dict) -> dict:
        """解析大模型响应"""
        try:
            content = response['choices'][0]['message']['content']
            return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group())
        except Exception as e:
            print(f"🔴 解析失败: {str(e)}")
            return {}


# ================== 知识图谱管理 ==================
class KnowledgeManager:
    """增强版知识图谱管理器"""

    def __init__(self):
        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
        self._clean_data()
        self.knowledge_map = self._load_knowledge()
        self.literacy_map = self._load_literacy()

    def _clean_data(self):
        """自动数据清洗"""
        self.graph.run("""
            MATCH (n) 
            WHERE n.name CONTAINS '测试' OR n.id IS NULL
            DETACH DELETE n
        """)

    def _load_knowledge(self) -> Dict[str, str]:
        """加载知识点"""
        return {rec['n.id']: rec['n.name']
                for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}

    def _load_literacy(self) -> Dict[str, str]:
        """加载素养点"""
        return {rec['n.value']: rec['n.title']
                for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}

    def store_analysis(self, question_id: str, content: str,
                       knowledge: List[str], literacy: List[str]):
        """事务化存储方法"""
        tx = self.graph.begin()
        try:
            # 转义特殊字符
            safe_content = content.replace("'", "\\'")

            # 创建/更新题目节点
            tx.run(f"""
                MERGE (q:Question {{id: '{question_id}'}})
                SET q.content = '{safe_content}'
            """)

            # 关联知识点
            for kp_name in knowledge:
                if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
                    tx.run(f"""
                        MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
                        WITH kp
                        MATCH (q:Question {{id: '{question_id}'}})
                        MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
                    """)

            # 关联素养点
            for lit_name in literacy:
                if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
                    tx.run(f"""
                        MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
                        WITH lp
                        MATCH (q:Question {{id: '{question_id}'}})
                        MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
                    """)

            self.graph.commit(tx)
            print("✅ 数据存储成功")
        except Exception as e:
            self.graph.rollback(tx)
            print(f"❌ 存储失败: {str(e)}")


# ================== 核心分析引擎 ==================
class ProblemAnalyzer:
    """题目分析处理器"""

    def __init__(self, content: str):
        self.original = content
        self.content = self._preprocess(content)
        self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
        self.kg = KnowledgeManager()
        self.llm = LLMClient()

    def _preprocess(self, text: str) -> str:
        """文本预处理"""
        return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]

    def analyze(self) -> dict:
        """执行分析流程"""
        # 本地规则分析
        local_result = LocalKnowledgeBase.analyze(self.content)

        # 大模型分析
        llm_result = self.llm.analyze_problem(self.original)

        # 结果融合
        return {
            "problem_id": self.question_id,
            "problem_types": list(set(
                local_result.get('problem_types', []) +
                llm_result.get('problem_types', [])
            ))[:3],
            "knowledge_points": list(set(
                local_result.get('knowledge_points', []) +
                llm_result.get('knowledge_points', [])
            ))[:2],
            "literacy_points": list(set(
                local_result.get('literacy_points', []) +
                llm_result.get('literacy_points', [])
            ))[:2]
        }

    def execute(self):
        """执行完整流程"""
        print(f"\n🔍 开始分析题目：{self.original[:50]}...")

        analysis = self.analyze()

        print("\n📊 分析报告：")
        print(f"  题型识别：{analysis.get('problem_types', [])}")
        print(f"  推荐知识点：{analysis.get('knowledge_points', [])}")
        print(f"  关联素养点：{analysis.get('literacy_points', [])}")

        # 存储到知识图谱
        self.kg.store_analysis(
            question_id=analysis['problem_id'],
            content=self.content,
            knowledge=analysis.get('knowledge_points', []),
            literacy=analysis.get('literacy_points', [])
        )


# ================== 测试用例 ==================
if __name__ == '__main__':
    test_cases = [
        "小明用50元买了3本笔记本，每本8元，还剩多少钱？",
        "甲乙两车相距300公里，甲车速度60km/h，乙车40km/h，几小时后相遇？",
        "一项工程甲队单独做需要10天，乙队需要15天，两队合作需要多少天？",
        "一个长方形长8cm，宽5cm，求面积和周长",
        "含盐20%的盐水500克，要配成15%的盐水，需加水多少克？"
    ]

    for question in test_cases:
        print("\n" + "=" * 80)
        print(f"📚 处理题目：{question}")
        analyzer = ProblemAnalyzer(question)
        analyzer.execute()
-												'commit'

											
										
										
											5 months ago
+								# -*- coding: utf-8 -*-
-												'commit'

											
										
										
											5 months ago
+								"""
 								数学题目分析系统 v5.0（离线可用版）
 								功能特性：
 . 本地规则引擎为主 + 大模型增强（可选）
 . 自动Neo4j数据清洗
 . 完善的错误处理
 . 详细的运行日志
 								"""
-												'commit'

											
										
										
											5 months ago
+								from Config import *
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								import hashlib
-												'commit'

											
										
										
											5 months ago
+								import json
 								import re
 								from typing import Dict, List
 								import jieba
 								import requests
-												'commit'

											
										
										
											5 months ago
+								from py2neo import Graph
 								from Config import *
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								# 初始化分词器
 								jieba.initialize()
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								# ================== 配置区 ==================
 								class Config:
-												'commit'

											
										
										
											5 months ago
+								    # 大模型配置（默认关闭）
-												'commit'

											
										
										
											5 months ago
+								    LLM_ENABLED = True  # 设置为True启用大模型
-												'commit'

											
										
										
											5 months ago
+								    LLM_TIMEOUT = 8
 								    LLM_MAX_RETRIES = 2
-												'commit'

											
										
										
											5 months ago
+								    # 系统参数
 								    MAX_CONTENT_LENGTH = 500
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								# ================== 本地知识库 ==================
-												'commit'

											
										
										
											5 months ago
+								class LocalKnowledgeBase:
-												'commit'

											
										
										
											5 months ago
+								    """本地规则分析引擎"""
-												'commit'

											
										
										
											5 months ago
+								    RULES = {
 								        'arithmetic': {
 								            'keywords': ['买', '卖', '元', '还剩', '单价', '总价'],
 								            'knowledge': ['四则运算应用（购物问题）'],
 								            'literacy': ['数感培养']
 								        },
 								        'travel': {
 								            'keywords': ['相遇', '速度', '距离', '时间', '出发'],
 								            'knowledge': ['相遇问题解决方案'],
 								            'literacy': ['空间观念']
 								        },
 								        'work': {
 								            'keywords': ['合作', '效率', '工期', '完成', '单独'],
 								            'knowledge': ['工程合作效率计算'],
 								            'literacy': ['模型思想']
 								        },
 								        'geometry': {
 								            'keywords': ['面积', '周长', '体积', '平方', '立方'],
 								            'knowledge': ['几何图形面积计算'],
 								            'literacy': ['空间观念']
 								        },
 								        'ratio': {
 								            'keywords': ['百分比', '浓度', '稀释', '配比'],
 								            'knowledge': ['浓度问题配比计算'],
 								            'literacy': ['数据分析']
 								        }
 								    }
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								    @classmethod
 								    def analyze(cls, content: str) -> dict:
 								        """本地规则分析"""
 								        result = {
 								            'problem_types': [],
 								            'knowledge_points': [],
 								            'literacy_points': []
 								        }
 								        words = set(jieba.cut(content))
 								        for ptype, config in cls.RULES.items():
-												'commit'

											
										
										
											5 months ago
+								            if len(words & set(config['keywords'])) >= 2:
-												'commit'

											
										
										
											5 months ago
+								                result['problem_types'].append(ptype)
 								                result['knowledge_points'].extend(config['knowledge'])
 								                result['literacy_points'].extend(config['literacy'])
 								        return result
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								# ================== 大模型客户端 ==================
-												'commit'

											
										
										
											5 months ago
+								class LLMClient:
-												'commit'

											
										
										
											5 months ago
+								    """增强版大模型客户端"""
-												'commit'

											
										
										
											5 months ago
 								    def __init__(self):
 								        self.enabled = Config.LLM_ENABLED
 								        self.base_url = MODEL_API_URL
-												'commit'

											
										
										
											5 months ago
+								        self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"}
-												'commit'

											
										
										
											5 months ago
 								    def analyze_problem(self, content: str) -> dict:
-												'commit'

											
										
										
											5 months ago
+								        """带重试机制的分析请求"""
-												'commit'

											
										
										
											5 months ago
+								        if not self.enabled:
 								            return {}
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								        for attempt in range(Config.LLM_MAX_RETRIES):
 								            try:
 								                response = requests.post(
 								                    f"{self.base_url}/chat/completions",
 								                    headers=self.headers,
 								                    json={
 								                        "model": MODEL_NAME,
 								                        "messages": [{
 								                            "role": "user",
 								                            "content": f"分析数学题目：{content}"
 								                        }],
 								                        "temperature": 0.3
 								                    },
 								                    timeout=Config.LLM_TIMEOUT
 								                )
 								                response.raise_for_status()
 								                return self._parse_response(response.json())
 								            except requests.exceptions.RequestException as e:
 								                print(f"🌐 网络错误（尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}）: {str(e)}")
 								        return {}
 								    def _parse_response(self, response: dict) -> dict:
-												'commit'

											
										
										
											5 months ago
+								        """解析大模型响应"""
-												'commit'

											
										
										
											5 months ago
+								        try:
-												'commit'

											
										
										
											5 months ago
+								            content = response['choices'][0]['message']['content']
 								            return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group())
 								        except Exception as e:
 								            print(f"🔴 解析失败: {str(e)}")
-												'commit'

											
										
										
											5 months ago
+								            return {}
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								# ================== 知识图谱管理 ==================
-												'commit'

											
										
										
											5 months ago
+								class KnowledgeManager:
-												'commit'

											
										
										
											5 months ago
+								    """增强版知识图谱管理器"""
-												'commit'

											
										
										
											5 months ago
+								    def __init__(self):
-												'commit'

											
										
										
											5 months ago
+								        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
-												'commit'

											
										
										
											5 months ago
+								        self._clean_data()
 								        self.knowledge_map = self._load_knowledge()
 								        self.literacy_map = self._load_literacy()
 								    def _clean_data(self):
-												'commit'

											
										
										
											5 months ago
+								        """自动数据清洗"""
-												'commit'

											
										
										
											5 months ago
+								        self.graph.run("""
 								            MATCH (n)
 								            WHERE n.name CONTAINS '测试' OR n.id IS NULL
 								            DETACH DELETE n
 								        """)
 								    def _load_knowledge(self) -> Dict[str, str]:
 								        """加载知识点"""
-												'commit'

											
										
										
											5 months ago
+								        return {rec['n.id']: rec['n.name']
 								                for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
-												'commit'

											
										
										
											5 months ago
 								    def _load_literacy(self) -> Dict[str, str]:
 								        """加载素养点"""
-												'commit'

											
										
										
											5 months ago
+								        return {rec['n.value']: rec['n.title']
 								                for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
-												'commit'

											
										
										
											5 months ago
 								    def store_analysis(self, question_id: str, content: str,
 								                       knowledge: List[str], literacy: List[str]):
-												'commit'

											
										
										
											5 months ago
+								        """事务化存储方法"""
 								        tx = self.graph.begin()
-												'commit'

											
										
										
											5 months ago
+								        try:
-												'commit'

											
										
										
											5 months ago
+								            # 转义特殊字符
 								            safe_content = content.replace("'", "\\'")
 								            # 创建/更新题目节点
 								            tx.run(f"""
 								                MERGE (q:Question {{id: '{question_id}'}})
 								                SET q.content = '{safe_content}'
 								            """)
-												'commit'

											
										
										
											5 months ago
 								            # 关联知识点
 								            for kp_name in knowledge:
-												'commit'

											
										
										
											5 months ago
+								                if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
-												'commit'

											
										
										
											5 months ago
+								                    tx.run(f"""
-												'commit'

											
										
										
											5 months ago
+								                        MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
-												'commit'

											
										
										
											5 months ago
+								                        WITH kp
-												'commit'

											
										
										
											5 months ago
+								                        MATCH (q:Question {{id: '{question_id}'}})
 								                        MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
 								                    """)
 								            # 关联素养点
 								            for lit_name in literacy:
-												'commit'

											
										
										
											5 months ago
+								                if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
-												'commit'

											
										
										
											5 months ago
+								                    tx.run(f"""
-												'commit'

											
										
										
											5 months ago
+								                        MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
-												'commit'

											
										
										
											5 months ago
+								                        WITH lp
-												'commit'

											
										
										
											5 months ago
+								                        MATCH (q:Question {{id: '{question_id}'}})
 								                        MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
 								                    """)
-												'commit'

											
										
										
											5 months ago
-												'commit'

											
										
										
											5 months ago
+								            self.graph.commit(tx)
-												'commit'

											
										
										
											5 months ago
+								            print("✅ 数据存储成功")
-												'commit'

											
										
										
											5 months ago
+								        except Exception as e:
-												'commit'

											
										
										
											5 months ago
+								            self.graph.rollback(tx)
-												'commit'

											
										
										
											5 months ago
+								            print(f"❌ 存储失败: {str(e)}")
-												'commit'

											
										
										
											5 months ago
 								# ================== 核心分析引擎 ==================
-												'commit'

											
										
										
											5 months ago
+								class ProblemAnalyzer:
-												'commit'

											
										
										
											5 months ago
+								    """题目分析处理器"""
-												'commit'

											
										
										
											5 months ago
 								    def __init__(self, content: str):
 								        self.original = content
 								        self.content = self._preprocess(content)
 								        self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
 								        self.kg = KnowledgeManager()
 								        self.llm = LLMClient()
 								    def _preprocess(self, text: str) -> str:
 								        """文本预处理"""
 								        return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
 								    def analyze(self) -> dict:
 								        """执行分析流程"""
 								        # 本地规则分析
 								        local_result = LocalKnowledgeBase.analyze(self.content)
-												'commit'

											
										
										
											5 months ago
+								        # 大模型分析
-												'commit'

											
										
										
											5 months ago
+								        llm_result = self.llm.analyze_problem(self.original)
 								        # 结果融合
 								        return {
 								            "problem_id": self.question_id,
 								            "problem_types": list(set(
 								                local_result.get('problem_types', []) +
 								                llm_result.get('problem_types', [])
 								            ))[:3],
 								            "knowledge_points": list(set(
 								                local_result.get('knowledge_points', []) +
 								                llm_result.get('knowledge_points', [])
 								            ))[:2],
 								            "literacy_points": list(set(
 								                local_result.get('literacy_points', []) +
 								                llm_result.get('literacy_points', [])
 								            ))[:2]
 								        }
 								    def execute(self):
 								        """执行完整流程"""
 								        print(f"\n🔍 开始分析题目：{self.original[:50]}...")
 								        analysis = self.analyze()
 								        print("\n📊 分析报告：")
 								        print(f"  题型识别：{analysis.get('problem_types', [])}")
 								        print(f"  推荐知识点：{analysis.get('knowledge_points', [])}")
 								        print(f"  关联素养点：{analysis.get('literacy_points', [])}")
 								        # 存储到知识图谱
 								        self.kg.store_analysis(
 								            question_id=analysis['problem_id'],
 								            content=self.content,
 								            knowledge=analysis.get('knowledge_points', []),
 								            literacy=analysis.get('literacy_points', [])
 								        )
 								# ================== 测试用例 ==================
-												'commit'

											
										
										
											5 months ago
+								if __name__ == '__main__':
-												'commit'

											
										
										
											5 months ago
+								    test_cases = [
 								        "小明用50元买了3本笔记本，每本8元，还剩多少钱？",
 								        "甲乙两车相距300公里，甲车速度60km/h，乙车40km/h，几小时后相遇？",
 								        "一项工程甲队单独做需要10天，乙队需要15天，两队合作需要多少天？",
 								        "一个长方形长8cm，宽5cm，求面积和周长",
 								        "含盐20%的盐水500克，要配成15%的盐水，需加水多少克？"
 								    ]
 								    for question in test_cases:
 								        print("\n" + "=" * 80)
-												'commit'

											
										
										
											5 months ago
+								        print(f"📚 处理题目：{question}")
-												'commit'

											
										
										
											5 months ago
+								        analyzer = ProblemAnalyzer(question)
 								        analyzer.execute()