'commit'

5 months ago · 81d2539f21
parent e37db2d656
commit 81d2539f21
2 changed files with 184 additions and 163 deletions
--- a/AI/Neo4j/N3_InputShiTi.py
+++ b/AI/Neo4j/N3_InputShiTi.py
@ -1,235 +1,256 @@
 # -*- coding: utf-8 -*-
 """
-数学题目分析系统 v6.1（严格匹配版）
-功能特性：
-1. 纯大模型分析
-2. 流式响应处理
-3. 严格匹配已有节点
-4. 不创建新知识点/素养点
+数学题目分析系统 v6.3（稳定流式处理版）
 """

 import re
 import json
 import hashlib
-from typing import Dict, List
 import requests
 from py2neo import Graph
-from Config import *
+from typing import Dict, List
+from Config import NEO4J_URI, NEO4J_AUTH, MODEL_API_URL, MODEL_API_KEY, MODEL_NAME


-# ================== 配置区 ==================
+# ================== 配置类 ==================
 class Config:
-    # Neo4j配置
    NEO4J_URI = NEO4J_URI
    NEO4J_AUTH = NEO4J_AUTH
-
-    # 大模型配置（示例为阿里云配置）
    MODEL_API_URL = MODEL_API_URL
    MODEL_API_KEY = MODEL_API_KEY
    MODEL_NAME = MODEL_NAME
-
-    # 超时配置
-    STREAM_TIMEOUT = 30  # 流式响应总超时
-    CHUNK_TIMEOUT = 5  # 单次数据块等待超时
-
-    # 系统参数
    MAX_CONTENT_LENGTH = 500
-
-
-# ================== 流式大模型客户端 ==================
-class StreamLLMClient:
-    """支持流式响应的大模型客户端"""
-
-    def __init__(self):
-        self.base_url = Config.MODEL_API_URL
-        self.headers = {
-            "Authorization": f"Bearer {Config.MODEL_API_KEY}",
-            "Content-Type": "application/json"
-        }
-
-    def analyze_problem(self, content: str) -> dict:
-        """流式响应分析"""
-        try:
-            response = requests.post(
-                f"{self.base_url}/chat/completions",
-                headers=self.headers,
-                json={
-                    "model": Config.MODEL_NAME,
-                    "messages": [{
-                        "role": "user",
-                        "content": f"""请严格按JSON格式分析数学题目：
-{{
-    "problem_types": ["题型列表"],
-    "knowledge_points": ["知识点名称（必须与数据库完全一致）"],
-    "literacy_points": ["素养点名称（必须与数据库完全一致）"]
-}}
-题目：{content}"""
-                    }],
-                    "temperature": 0.2,
-                    "stream": True
-                },
-                timeout=Config.STREAM_TIMEOUT,
-                stream=True
-            )
-            response.raise_for_status()
-
-            return self._process_stream(response)
-
-        except requests.exceptions.RequestException as e:
-            print(f"🌐 网络错误: {str(e)}")
-            return {}
-        except Exception as e:
-            print(f"🔴 解析失败: {str(e)}")
-            return {}
-
-    def _process_stream(self, response) -> dict:
-        """处理流式响应"""
-        full_content = ""
-        for chunk in response.iter_lines():
-            if chunk:
-                decoded_chunk = chunk.decode('utf-8')
-                if decoded_chunk.startswith("data:"):
-                    try:
-                        chunk_data = json.loads(decoded_chunk[5:])
-                        content = chunk_data['choices'][0]['delta'].get('content', '')
-                        full_content += content
-                    except:
-                        continue
-
-        try:
-            json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group()
-            return json.loads(json_str)
-        except:
-            print("🔴 无法解析大模型输出")
-            return {}
+    STREAM_TIMEOUT = 30


 # ================== 知识图谱管理 ==================
 class KnowledgeManager:
-    """严格匹配型知识图谱管理器"""
-
    def __init__(self):
        self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
+        self._verify_connection()
        self.knowledge_map = self._load_knowledge()
        self.literacy_map = self._load_literacy()
+        print("✅ 知识库加载完成")
+        print(f"有效知识点({len(self.knowledge_map)}个): {list(self.knowledge_map.keys())[:3]}...")
+        print(f"有效素养点({len(self.literacy_map)}个): {list(self.literacy_map.keys())[:3]}...")
+
+    def _verify_connection(self):
+        """安全连接验证"""
+        try:
+            self.graph.run("RETURN 1 AS test")
+            print("✅ Neo4j连接验证成功")
+        except Exception as e:
+            print(f"❌ Neo4j连接失败: {str(e)}")
+            raise

    def _load_knowledge(self) -> Dict[str, str]:
-        """加载知识点映射（id -> name）"""
-        return {rec['n.id']: rec['n.name']
-                for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
+        """安全加载知识点"""
+        records = self.graph.run("MATCH (n:KnowledgePoint) WHERE n.name IS NOT NULL RETURN n.id, n.name").data()
+        return {rec['n.name'].strip(): rec['n.id'] for rec in records if rec['n.name'] and rec['n.name'].strip()}

    def _load_literacy(self) -> Dict[str, str]:
-        """加载素养点映射（value -> title）"""
-        return {rec['n.value']: rec['n.title']
-                for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
+        """安全加载素养点"""
+        records = self.graph.run("MATCH (n:LiteracyNode) WHERE n.title IS NOT NULL RETURN n.value, n.title").data()
+        return {rec['n.title'].strip(): rec['n.value'] for rec in records if rec['n.title'] and rec['n.title'].strip()}

    def store_analysis(self, question_id: str, content: str,
                       knowledge: List[str], literacy: List[str]):
-        """使用参数化查询解决转义问题"""
+        """增强存储方法"""
        tx = self.graph.begin()
        try:
-            # 使用参数化查询避免转义问题
-            tx.run(
-                "MERGE (q:Question {id: $question_id}) "
-                "SET q.content = $content",
-                {
-                    "question_id": question_id,
-                    "content": content
-                }
-            )
-
-            # 关联知识点（参数化版本）
-            for kp_name in knowledge:
-                if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
-                    tx.run(
-                        "MATCH (kp:KnowledgePoint {id: $kp_id}) "
-                        "MATCH (q:Question {id: $qid}) "
-                        "MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)",
-                        {"kp_id": kp_id, "qid": question_id}
-                    )
-
-            # 关联素养点（参数化版本）
-            for lit_name in literacy:
-                if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
-                    tx.run(
-                        "MATCH (lp:LiteracyNode {value: $lit_id}) "
-                        "MATCH (q:Question {id: $qid}) "
-                        "MERGE (q)-[:DEVELOPS_LITERACY]->(lp)",
-                        {"lit_id": lit_id, "qid": question_id}
-                    )
+            # 创建/更新题目节点
+            tx.run("""
+                MERGE (q:Question {id: $id})
+                SET q.content = $content, q.updateTime = timestamp()
+                """, {"id": question_id, "content": content})
+
+            # 关联知识点
+            valid_kp = []
+            for name in knowledge:
+                clean_name = name.strip()
+                if kp_id := self.knowledge_map.get(clean_name):
+                    tx.run("""
+                        MATCH (q:Question {id: $qid}), (kp:KnowledgePoint {id: $kpid})
+                        MERGE (q)-[r:REQUIRES_KNOWLEDGE]->(kp)
+                        SET r.lastUsed = timestamp()
+                        """, {"qid": question_id, "kpid": kp_id})
+                    valid_kp.append(clean_name)
+
+            # 关联素养点
+            valid_lp = []
+            for title in literacy:
+                clean_title = title.strip()
+                if lit_id := self.literacy_map.get(clean_title):
+                    tx.run("""
+                        MATCH (q:Question {id: $qid}), (lp:LiteracyNode {value: $lpid})
+                        MERGE (q)-[r:DEVELOPS_LITERACY]->(lp)
+                        SET r.lastUsed = timestamp()
+                        """, {"qid": question_id, "lpid": lit_id})
+                    valid_lp.append(clean_title)

            self.graph.commit(tx)
-            print("✅ 数据存储成功")
+            print(f"✅ 存储成功 - 知识点: {valid_kp}, 素养点: {valid_lp}")
        except Exception as e:
            self.graph.rollback(tx)
            print(f"❌ 存储失败: {str(e)}")


-# ================== 核心分析引擎 ==================
-class ProblemAnalyzer:
-    """纯大模型分析引擎"""
+# ================== 大模型客户端 ==================
+class StreamLLMClient:
+    def __init__(self, kg: KnowledgeManager):
+        self.kg = kg
+        self.base_url = Config.MODEL_API_URL
+        self.headers = {
+            "Authorization": f"Bearer {Config.MODEL_API_KEY}",
+            "Content-Type": "application/json",
+            "Accept": "application/json"
+        }

+    def analyze_problem(self, content: str) -> dict:
+        """增强的流式分析"""
+        try:
+            prompt = self._build_prompt(content)
+            response = self._send_request(prompt)
+            return self._process_stream(response)
+        except Exception as e:
+            print(f"🔴 分析失败: {str(e)}")
+            return {}
+
+    def _build_prompt(self, content: str) -> str:
+        """构建精准提示词"""
+        return f"""请严格按以下要求分析数学题目：
+1. 知识点必须完全匹配以下列表中的名称（不要新增或修改）：
+{self.kg.knowledge_map.keys()}
+
+2. 素养点必须完全匹配以下列表中的名称：
+{self.kg.literacy_map.keys()}
+
+3. 返回严格JSON格式：
+{{
+    "problem_types": ["题型"],
+    "knowledge_points": ["匹配的知识点"],
+    "literacy_points": ["匹配的素养点"]
+}}
+
+题目内容：{content}"""
+
+    def _send_request(self, prompt: str):
+        """发送API请求"""
+        return requests.post(
+            f"{self.base_url}/chat/completions",
+            headers=self.headers,
+            json={
+                "model": Config.MODEL_NAME,
+                "messages": [{"role": "user", "content": prompt}],
+                "temperature": 0.1,
+                "stream": True
+            },
+            timeout=Config.STREAM_TIMEOUT,
+            stream=True
+        )
+
+    def _process_stream(self, response) -> dict:
+        """可靠的流式处理"""
+        full_content = ""
+        try:
+            for chunk in response.iter_lines():
+                if chunk:
+                    decoded = chunk.decode('utf-8').strip()
+                    if decoded.startswith('data:'):
+                        json_str = decoded[5:].strip()
+                        if json_str == "[DONE]":
+                            break
+                        try:
+                            data = json.loads(json_str)
+                            if content := data['choices'][0]['delta'].get('content'):
+                                full_content += content
+                        except:
+                            continue
+
+            # 调试日志
+            print(f"原始响应内容：\n{full_content}")
+
+            # 提取有效JSON
+            json_str = re.search(r'\{[\s\S]*\}', full_content).group()
+            return json.loads(json_str)
+        except json.JSONDecodeError:
+            print(f"⚠️ JSON解析失败，原始内容：{full_content}")
+            return {}
+        except Exception as e:
+            print(f"流处理异常：{str(e)}")
+            return {}
+
+
+# ================== 核心引擎 ==================
+class ProblemAnalyzer:
    def __init__(self, content: str):
        self.original = content
        self.content = self._preprocess(content)
        self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
        self.kg = KnowledgeManager()
-        self.llm = StreamLLMClient()
+        self.llm = StreamLLMClient(self.kg)

    def _preprocess(self, text: str) -> str:
        """文本预处理"""
        return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]

-    def analyze(self) -> dict:
-        """纯大模型分析"""
-        result = self.llm.analyze_problem(self.original)
-        return {
-            "problem_id": self.question_id,
-            "problem_types": result.get('problem_types', [])[:3],
-            "knowledge_points": result.get('knowledge_points', [])[:2],
-            "literacy_points": result.get('literacy_points', [])[:2]
-        }
-
    def execute(self):
-        """执行完整流程"""
-        print(f"\n🔍 开始分析题目：{self.original[:50]}...")
+        """执行分析流程"""
+        print(f"\n🔍 分析题目: {self.original[:50]}...")

-        analysis = self.analyze()
+        analysis = self.llm.analyze_problem(self.original)
+        if not analysis:
+            print("⚠️ 大模型分析失败")
+            return

-        print("\n📊 大模型分析报告：")
-        print(f"  题型识别：{analysis.get('problem_types', [])}")
-        print(f"  推荐知识点：{analysis.get('knowledge_points', [])}")
-        print(f"  关联素养点：{analysis.get('literacy_points', [])}")
+        print("\n📊 分析结果:")
+        print(f"  题型: {analysis.get('problem_types', [])}")
+        print(f"  知识点: {analysis.get('knowledge_points', [])}")
+        print(f"  素养点: {analysis.get('literacy_points', [])}")

-        # 存储到知识图谱
        self.kg.store_analysis(
-            question_id=analysis['problem_id'],
+            question_id=self.question_id,
            content=self.content,
            knowledge=analysis.get('knowledge_points', []),
            literacy=analysis.get('literacy_points', [])
        )


-# ================== 测试用例 ==================
+# ================== 查询接口 ==================
+def query_question(question_id: str):
+    try:
+        graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
+        result = graph.run("""
+            MATCH (q:Question {id: $id})
+            OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp)
+            OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp)
+            RETURN q.content AS content,
+                   collect(kp.name) AS knowledge,
+                   collect(lp.title) AS literacy
+        """, id=question_id).data()
+
+        if result:
+            data = result[0]
+            print(f"\n🔍 查询结果（ID: {question_id}）")
+            print(f"内容: {data['content']}")
+            print(f"知识点: {data['knowledge']}")
+            print(f"素养点: {data['literacy']}")
+        else:
+            print("未找到相关题目")
+    except Exception as e:
+        print(f"查询错误: {str(e)}")
+
+
+# ================== 测试执行 ==================
 if __name__ == '__main__':
    test_cases = [
        "小明用50元买了3本笔记本，每本8元，还剩多少钱？",
        "甲乙两车相距300公里，甲车速度60km/h，乙车40km/h，几小时后相遇？"
    ]

-    for question in test_cases:
-        print("\n" + "=" * 80)
-        print(f"📚 处理题目：{question}")
-        analyzer = ProblemAnalyzer(question)
+    for q in test_cases:
+        print("\n" + "=" * 60)
+        analyzer = ProblemAnalyzer(q)
        analyzer.execute()

-'''
-// 查询题目关联信息
-MATCH (q:Question {id: '6fff79108736'})
-OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp:KnowledgePoint)
-OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp:LiteracyNode)
-RETURN 
-  q.content AS 题目内容,
-  COLLECT(DISTINCT kp.name) AS 关联知识点,
-  COLLECT(DISTINCT lp.title) AS 关联素养点
-'''
+    query_question('6fff79108736')
--- a/AI/Neo4j/pycache/N2_ReadXksy.cpython-310.pyc
+++ b/AI/Neo4j/pycache/N2_ReadXksy.cpython-310.pyc