'commit'

5 months ago · e431f7c354
parent 0e2585110e
commit e431f7c354
1 changed files with 117 additions and 167 deletions
--- a/AI/Neo4j/N3_InputShiTi.py
+++ b/AI/Neo4j/N3_InputShiTi.py
@ -1,194 +1,162 @@
 # -*- coding: utf-8 -*-
 """
-数学题目分析系统 v5.0（离线可用版）
+数学题目分析系统 v6.1（严格匹配版）
 功能特性：
-1. 本地规则引擎为主 + 大模型增强（可选）
-2. 自动Neo4j数据清洗
-3. 完善的错误处理
-4. 详细的运行日志
+1. 纯大模型分析
+2. 流式响应处理
+3. 严格匹配已有节点
+4. 不创建新知识点/素养点
 """
-from Config import *

-import hashlib
-import json
 import re
+import json
+import hashlib
 from typing import Dict, List
-
-import jieba
 import requests
 from py2neo import Graph
 from Config import *

-# 初始化分词器
-jieba.initialize()
-

 # ================== 配置区 ==================
 class Config:
-    # 大模型配置（默认关闭）
-    LLM_ENABLED = True  # 设置为True启用大模型
-    LLM_TIMEOUT = 8
-    LLM_MAX_RETRIES = 2
+    # Neo4j配置
+    NEO4J_URI = NEO4J_URI
+    NEO4J_AUTH = NEO4J_AUTH

-    # 系统参数
-    MAX_CONTENT_LENGTH = 500
+    # 大模型配置（示例为阿里云配置）
+    MODEL_API_URL = MODEL_API_URL
+    MODEL_API_KEY = MODEL_API_KEY
+    MODEL_NAME = MODEL_NAME

+    # 超时配置
+    STREAM_TIMEOUT = 30  # 流式响应总超时
+    CHUNK_TIMEOUT = 5  # 单次数据块等待超时

-# ================== 本地知识库 ==================
-class LocalKnowledgeBase:
-    """本地规则分析引擎"""
-    RULES = {
-        'arithmetic': {
-            'keywords': ['买', '卖', '元', '还剩', '单价', '总价'],
-            'knowledge': ['四则运算应用（购物问题）'],
-            'literacy': ['数感培养']
-        },
-        'travel': {
-            'keywords': ['相遇', '速度', '距离', '时间', '出发'],
-            'knowledge': ['相遇问题解决方案'],
-            'literacy': ['空间观念']
-        },
-        'work': {
-            'keywords': ['合作', '效率', '工期', '完成', '单独'],
-            'knowledge': ['工程合作效率计算'],
-            'literacy': ['模型思想']
-        },
-        'geometry': {
-            'keywords': ['面积', '周长', '体积', '平方', '立方'],
-            'knowledge': ['几何图形面积计算'],
-            'literacy': ['空间观念']
-        },
-        'ratio': {
-            'keywords': ['百分比', '浓度', '稀释', '配比'],
-            'knowledge': ['浓度问题配比计算'],
-            'literacy': ['数据分析']
-        }
-    }
-
-    @classmethod
-    def analyze(cls, content: str) -> dict:
-        """本地规则分析"""
-        result = {
-            'problem_types': [],
-            'knowledge_points': [],
-            'literacy_points': []
-        }
-
-        words = set(jieba.cut(content))
-        for ptype, config in cls.RULES.items():
-            if len(words & set(config['keywords'])) >= 2:
-                result['problem_types'].append(ptype)
-                result['knowledge_points'].extend(config['knowledge'])
-                result['literacy_points'].extend(config['literacy'])
-        return result
+    # 系统参数
+    MAX_CONTENT_LENGTH = 500


-# ================== 大模型客户端 ==================
-class LLMClient:
-    """增强版大模型客户端"""
+# ================== 流式大模型客户端 ==================
+class StreamLLMClient:
+    """支持流式响应的大模型客户端"""

    def __init__(self):
-        self.enabled = Config.LLM_ENABLED
-        self.base_url = MODEL_API_URL
-        self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"}
+        self.base_url = Config.MODEL_API_URL
+        self.headers = {
+            "Authorization": f"Bearer {Config.MODEL_API_KEY}",
+            "Content-Type": "application/json"
+        }

    def analyze_problem(self, content: str) -> dict:
-        """带重试机制的分析请求"""
-        if not self.enabled:
-            return {}
-
-        for attempt in range(Config.LLM_MAX_RETRIES):
-            try:
-                response = requests.post(
-                    f"{self.base_url}/chat/completions",
-                    headers=self.headers,
-                    json={
-                        "model": MODEL_NAME,
-                        "messages": [{
-                            "role": "user",
-                            "content": f"分析数学题目：{content}"
-                        }],
-                        "temperature": 0.3
-                    },
-                    timeout=Config.LLM_TIMEOUT
-                )
-                response.raise_for_status()
-                return self._parse_response(response.json())
-            except requests.exceptions.RequestException as e:
-                print(f"🌐 网络错误（尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}）: {str(e)}")
-        return {}
-
-    def _parse_response(self, response: dict) -> dict:
-        """解析大模型响应"""
+        """流式响应分析"""
        try:
-            content = response['choices'][0]['message']['content']
-            return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group())
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json={
+                    "model": Config.MODEL_NAME,
+                    "messages": [{
+                        "role": "user",
+                        "content": f"""请严格按JSON格式分析数学题目：
+{{
+    "problem_types": ["题型列表"],
+    "knowledge_points": ["知识点名称（必须与数据库完全一致）"],
+    "literacy_points": ["素养点名称（必须与数据库完全一致）"]
+}}
+题目：{content}"""
+                    }],
+                    "temperature": 0.2,
+                    "stream": True
+                },
+                timeout=Config.STREAM_TIMEOUT,
+                stream=True
+            )
+            response.raise_for_status()
+
+            return self._process_stream(response)
+
+        except requests.exceptions.RequestException as e:
+            print(f"🌐 网络错误: {str(e)}")
+            return {}
        except Exception as e:
            print(f"🔴 解析失败: {str(e)}")
            return {}

+    def _process_stream(self, response) -> dict:
+        """处理流式响应"""
+        full_content = ""
+        for chunk in response.iter_lines():
+            if chunk:
+                decoded_chunk = chunk.decode('utf-8')
+                if decoded_chunk.startswith("data:"):
+                    try:
+                        chunk_data = json.loads(decoded_chunk[5:])
+                        content = chunk_data['choices'][0]['delta'].get('content', '')
+                        full_content += content
+                    except:
+                        continue
+
+        try:
+            json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group()
+            return json.loads(json_str)
+        except:
+            print("🔴 无法解析大模型输出")
+            return {}
+

 # ================== 知识图谱管理 ==================
 class KnowledgeManager:
-    """增强版知识图谱管理器"""
+    """严格匹配型知识图谱管理器"""

    def __init__(self):
-        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
-        self._clean_data()
+        self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
        self.knowledge_map = self._load_knowledge()
        self.literacy_map = self._load_literacy()

-    def _clean_data(self):
-        """自动数据清洗"""
-        self.graph.run("""
-            MATCH (n) 
-            WHERE n.name CONTAINS '测试' OR n.id IS NULL
-            DETACH DELETE n
-        """)
-
    def _load_knowledge(self) -> Dict[str, str]:
-        """加载知识点"""
+        """加载知识点映射（id -> name）"""
        return {rec['n.id']: rec['n.name']
                for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}

    def _load_literacy(self) -> Dict[str, str]:
-        """加载素养点"""
+        """加载素养点映射（value -> title）"""
        return {rec['n.value']: rec['n.title']
                for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}

    def store_analysis(self, question_id: str, content: str,
                       knowledge: List[str], literacy: List[str]):
-        """事务化存储方法"""
+        """使用参数化查询解决转义问题"""
        tx = self.graph.begin()
        try:
-            # 转义特殊字符
-            safe_content = content.replace("'", "\\'")
-
-            # 创建/更新题目节点
-            tx.run(f"""
-                MERGE (q:Question {{id: '{question_id}'}})
-                SET q.content = '{safe_content}'
-            """)
-
-            # 关联知识点
+            # 使用参数化查询避免转义问题
+            tx.run(
+                "MERGE (q:Question {id: $question_id}) "
+                "SET q.content = $content",
+                {
+                    "question_id": question_id,
+                    "content": content
+                }
+            )
+
+            # 关联知识点（参数化版本）
            for kp_name in knowledge:
                if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
-                    tx.run(f"""
-                        MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
-                        WITH kp
-                        MATCH (q:Question {{id: '{question_id}'}})
-                        MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
-                    """)
-
-            # 关联素养点
+                    tx.run(
+                        "MATCH (kp:KnowledgePoint {id: $kp_id}) "
+                        "MATCH (q:Question {id: $qid}) "
+                        "MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)",
+                        {"kp_id": kp_id, "qid": question_id}
+                    )
+
+            # 关联素养点（参数化版本）
            for lit_name in literacy:
                if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
-                    tx.run(f"""
-                        MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
-                        WITH lp
-                        MATCH (q:Question {{id: '{question_id}'}})
-                        MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
-                    """)
+                    tx.run(
+                        "MATCH (lp:LiteracyNode {value: $lit_id}) "
+                        "MATCH (q:Question {id: $qid}) "
+                        "MERGE (q)-[:DEVELOPS_LITERACY]->(lp)",
+                        {"lit_id": lit_id, "qid": question_id}
+                    )

            self.graph.commit(tx)
            print("✅ 数据存储成功")
@ -199,42 +167,27 @@ class KnowledgeManager:

 # ================== 核心分析引擎 ==================
 class ProblemAnalyzer:
-    """题目分析处理器"""
+    """纯大模型分析引擎"""

    def __init__(self, content: str):
        self.original = content
        self.content = self._preprocess(content)
        self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
        self.kg = KnowledgeManager()
-        self.llm = LLMClient()
+        self.llm = StreamLLMClient()

    def _preprocess(self, text: str) -> str:
        """文本预处理"""
        return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]

    def analyze(self) -> dict:
-        """执行分析流程"""
-        # 本地规则分析
-        local_result = LocalKnowledgeBase.analyze(self.content)
-
-        # 大模型分析
-        llm_result = self.llm.analyze_problem(self.original)
-
-        # 结果融合
+        """纯大模型分析"""
+        result = self.llm.analyze_problem(self.original)
        return {
            "problem_id": self.question_id,
-            "problem_types": list(set(
-                local_result.get('problem_types', []) +
-                llm_result.get('problem_types', [])
-            ))[:3],
-            "knowledge_points": list(set(
-                local_result.get('knowledge_points', []) +
-                llm_result.get('knowledge_points', [])
-            ))[:2],
-            "literacy_points": list(set(
-                local_result.get('literacy_points', []) +
-                llm_result.get('literacy_points', [])
-            ))[:2]
+            "problem_types": result.get('problem_types', [])[:3],
+            "knowledge_points": result.get('knowledge_points', [])[:2],
+            "literacy_points": result.get('literacy_points', [])[:2]
        }

    def execute(self):
@ -243,7 +196,7 @@ class ProblemAnalyzer:

        analysis = self.analyze()

-        print("\n📊 分析报告：")
+        print("\n📊 大模型分析报告：")
        print(f"  题型识别：{analysis.get('problem_types', [])}")
        print(f"  推荐知识点：{analysis.get('knowledge_points', [])}")
        print(f"  关联素养点：{analysis.get('literacy_points', [])}")
@ -261,14 +214,11 @@ class ProblemAnalyzer:
 if __name__ == '__main__':
    test_cases = [
        "小明用50元买了3本笔记本，每本8元，还剩多少钱？",
-        "甲乙两车相距300公里，甲车速度60km/h，乙车40km/h，几小时后相遇？",
-        "一项工程甲队单独做需要10天，乙队需要15天，两队合作需要多少天？",
-        "一个长方形长8cm，宽5cm，求面积和周长",
-        "含盐20%的盐水500克，要配成15%的盐水，需加水多少克？"
+        "甲乙两车相距300公里，甲车速度60km/h，乙车40km/h，几小时后相遇？"
    ]

    for question in test_cases:
        print("\n" + "=" * 80)
        print(f"📚 处理题目：{question}")
        analyzer = ProblemAnalyzer(question)
-        analyzer.execute()
+        analyzer.execute()