main
黄海 5 months ago
parent e37db2d656
commit 81d2539f21

@ -1,235 +1,256 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
数学题目分析系统 v6.1严格匹配版 数学题目分析系统 v6.3稳定流式处理版
功能特性
1. 纯大模型分析
2. 流式响应处理
3. 严格匹配已有节点
4. 不创建新知识点/素养点
""" """
import re import re
import json import json
import hashlib import hashlib
from typing import Dict, List
import requests import requests
from py2neo import Graph from py2neo import Graph
from Config import * from typing import Dict, List
from Config import NEO4J_URI, NEO4J_AUTH, MODEL_API_URL, MODEL_API_KEY, MODEL_NAME
# ================== 配置 ================== # ================== 配置 ==================
class Config: class Config:
# Neo4j配置
NEO4J_URI = NEO4J_URI NEO4J_URI = NEO4J_URI
NEO4J_AUTH = NEO4J_AUTH NEO4J_AUTH = NEO4J_AUTH
# 大模型配置(示例为阿里云配置)
MODEL_API_URL = MODEL_API_URL MODEL_API_URL = MODEL_API_URL
MODEL_API_KEY = MODEL_API_KEY MODEL_API_KEY = MODEL_API_KEY
MODEL_NAME = MODEL_NAME MODEL_NAME = MODEL_NAME
MAX_CONTENT_LENGTH = 500
STREAM_TIMEOUT = 30
# 超时配置
STREAM_TIMEOUT = 30 # 流式响应总超时
CHUNK_TIMEOUT = 5 # 单次数据块等待超时
# 系统参数 # ================== 知识图谱管理 ==================
MAX_CONTENT_LENGTH = 500 class KnowledgeManager:
def __init__(self):
self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
self._verify_connection()
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
print("✅ 知识库加载完成")
print(f"有效知识点({len(self.knowledge_map)}个): {list(self.knowledge_map.keys())[:3]}...")
print(f"有效素养点({len(self.literacy_map)}个): {list(self.literacy_map.keys())[:3]}...")
def _verify_connection(self):
"""安全连接验证"""
try:
self.graph.run("RETURN 1 AS test")
print("✅ Neo4j连接验证成功")
except Exception as e:
print(f"❌ Neo4j连接失败: {str(e)}")
raise
# ================== 流式大模型客户端 ================== def _load_knowledge(self) -> Dict[str, str]:
class StreamLLMClient: """安全加载知识点"""
"""支持流式响应的大模型客户端""" records = self.graph.run("MATCH (n:KnowledgePoint) WHERE n.name IS NOT NULL RETURN n.id, n.name").data()
return {rec['n.name'].strip(): rec['n.id'] for rec in records if rec['n.name'] and rec['n.name'].strip()}
def __init__(self): def _load_literacy(self) -> Dict[str, str]:
"""安全加载素养点"""
records = self.graph.run("MATCH (n:LiteracyNode) WHERE n.title IS NOT NULL RETURN n.value, n.title").data()
return {rec['n.title'].strip(): rec['n.value'] for rec in records if rec['n.title'] and rec['n.title'].strip()}
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
"""增强存储方法"""
tx = self.graph.begin()
try:
# 创建/更新题目节点
tx.run("""
MERGE (q:Question {id: $id})
SET q.content = $content, q.updateTime = timestamp()
""", {"id": question_id, "content": content})
# 关联知识点
valid_kp = []
for name in knowledge:
clean_name = name.strip()
if kp_id := self.knowledge_map.get(clean_name):
tx.run("""
MATCH (q:Question {id: $qid}), (kp:KnowledgePoint {id: $kpid})
MERGE (q)-[r:REQUIRES_KNOWLEDGE]->(kp)
SET r.lastUsed = timestamp()
""", {"qid": question_id, "kpid": kp_id})
valid_kp.append(clean_name)
# 关联素养点
valid_lp = []
for title in literacy:
clean_title = title.strip()
if lit_id := self.literacy_map.get(clean_title):
tx.run("""
MATCH (q:Question {id: $qid}), (lp:LiteracyNode {value: $lpid})
MERGE (q)-[r:DEVELOPS_LITERACY]->(lp)
SET r.lastUsed = timestamp()
""", {"qid": question_id, "lpid": lit_id})
valid_lp.append(clean_title)
self.graph.commit(tx)
print(f"✅ 存储成功 - 知识点: {valid_kp}, 素养点: {valid_lp}")
except Exception as e:
self.graph.rollback(tx)
print(f"❌ 存储失败: {str(e)}")
# ================== 大模型客户端 ==================
class StreamLLMClient:
def __init__(self, kg: KnowledgeManager):
self.kg = kg
self.base_url = Config.MODEL_API_URL self.base_url = Config.MODEL_API_URL
self.headers = { self.headers = {
"Authorization": f"Bearer {Config.MODEL_API_KEY}", "Authorization": f"Bearer {Config.MODEL_API_KEY}",
"Content-Type": "application/json" "Content-Type": "application/json",
"Accept": "application/json"
} }
def analyze_problem(self, content: str) -> dict: def analyze_problem(self, content: str) -> dict:
"""流式响应分析""" """增强的流式分析"""
try: try:
response = requests.post( prompt = self._build_prompt(content)
response = self._send_request(prompt)
return self._process_stream(response)
except Exception as e:
print(f"🔴 分析失败: {str(e)}")
return {}
def _build_prompt(self, content: str) -> str:
"""构建精准提示词"""
return f"""请严格按以下要求分析数学题目:
1. 知识点必须完全匹配以下列表中的名称不要新增或修改
{self.kg.knowledge_map.keys()}
2. 素养点必须完全匹配以下列表中的名称
{self.kg.literacy_map.keys()}
3. 返回严格JSON格式
{{
"problem_types": ["题型"],
"knowledge_points": ["匹配的知识点"],
"literacy_points": ["匹配的素养点"]
}}
题目内容{content}"""
def _send_request(self, prompt: str):
"""发送API请求"""
return requests.post(
f"{self.base_url}/chat/completions", f"{self.base_url}/chat/completions",
headers=self.headers, headers=self.headers,
json={ json={
"model": Config.MODEL_NAME, "model": Config.MODEL_NAME,
"messages": [{ "messages": [{"role": "user", "content": prompt}],
"role": "user", "temperature": 0.1,
"content": f"""请严格按JSON格式分析数学题目
{{
"problem_types": ["题型列表"],
"knowledge_points": ["知识点名称(必须与数据库完全一致)"],
"literacy_points": ["素养点名称(必须与数据库完全一致)"]
}}
题目{content}"""
}],
"temperature": 0.2,
"stream": True "stream": True
}, },
timeout=Config.STREAM_TIMEOUT, timeout=Config.STREAM_TIMEOUT,
stream=True stream=True
) )
response.raise_for_status()
return self._process_stream(response)
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误: {str(e)}")
return {}
except Exception as e:
print(f"🔴 解析失败: {str(e)}")
return {}
def _process_stream(self, response) -> dict: def _process_stream(self, response) -> dict:
"""处理流式响应""" """可靠的流式处理"""
full_content = "" full_content = ""
try:
for chunk in response.iter_lines(): for chunk in response.iter_lines():
if chunk: if chunk:
decoded_chunk = chunk.decode('utf-8') decoded = chunk.decode('utf-8').strip()
if decoded_chunk.startswith("data:"): if decoded.startswith('data:'):
json_str = decoded[5:].strip()
if json_str == "[DONE]":
break
try: try:
chunk_data = json.loads(decoded_chunk[5:]) data = json.loads(json_str)
content = chunk_data['choices'][0]['delta'].get('content', '') if content := data['choices'][0]['delta'].get('content'):
full_content += content full_content += content
except: except:
continue continue
try: # 调试日志
json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group() print(f"原始响应内容:\n{full_content}")
# 提取有效JSON
json_str = re.search(r'\{[\s\S]*\}', full_content).group()
return json.loads(json_str) return json.loads(json_str)
except: except json.JSONDecodeError:
print("🔴 无法解析大模型输出") print(f"⚠️ JSON解析失败原始内容{full_content}")
return {} return {}
# ================== 知识图谱管理 ==================
class KnowledgeManager:
"""严格匹配型知识图谱管理器"""
def __init__(self):
self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点映射id -> name"""
return {rec['n.id']: rec['n.name']
for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
def _load_literacy(self) -> Dict[str, str]:
"""加载素养点映射value -> title"""
return {rec['n.value']: rec['n.title']
for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
"""使用参数化查询解决转义问题"""
tx = self.graph.begin()
try:
# 使用参数化查询避免转义问题
tx.run(
"MERGE (q:Question {id: $question_id}) "
"SET q.content = $content",
{
"question_id": question_id,
"content": content
}
)
# 关联知识点(参数化版本)
for kp_name in knowledge:
if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
tx.run(
"MATCH (kp:KnowledgePoint {id: $kp_id}) "
"MATCH (q:Question {id: $qid}) "
"MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)",
{"kp_id": kp_id, "qid": question_id}
)
# 关联素养点(参数化版本)
for lit_name in literacy:
if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
tx.run(
"MATCH (lp:LiteracyNode {value: $lit_id}) "
"MATCH (q:Question {id: $qid}) "
"MERGE (q)-[:DEVELOPS_LITERACY]->(lp)",
{"lit_id": lit_id, "qid": question_id}
)
self.graph.commit(tx)
print("✅ 数据存储成功")
except Exception as e: except Exception as e:
self.graph.rollback(tx) print(f"流处理异常:{str(e)}")
print(f"❌ 存储失败: {str(e)}") return {}
# ================== 核心分析引擎 ================== # ================== 核心引擎 ==================
class ProblemAnalyzer: class ProblemAnalyzer:
"""纯大模型分析引擎"""
def __init__(self, content: str): def __init__(self, content: str):
self.original = content self.original = content
self.content = self._preprocess(content) self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12] self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager() self.kg = KnowledgeManager()
self.llm = StreamLLMClient() self.llm = StreamLLMClient(self.kg)
def _preprocess(self, text: str) -> str: def _preprocess(self, text: str) -> str:
"""文本预处理""" """文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH] return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""纯大模型分析"""
result = self.llm.analyze_problem(self.original)
return {
"problem_id": self.question_id,
"problem_types": result.get('problem_types', [])[:3],
"knowledge_points": result.get('knowledge_points', [])[:2],
"literacy_points": result.get('literacy_points', [])[:2]
}
def execute(self): def execute(self):
"""执行完整流程""" """执行分析流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...") print(f"\n🔍 分析题目: {self.original[:50]}...")
analysis = self.analyze() analysis = self.llm.analyze_problem(self.original)
if not analysis:
print("⚠️ 大模型分析失败")
return
print("\n📊 大模型分析报告:") print("\n📊 分析结果:")
print(f" 题型识别:{analysis.get('problem_types', [])}") print(f" 题型: {analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}") print(f" 知识点: {analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}") print(f" 素养点: {analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis( self.kg.store_analysis(
question_id=analysis['problem_id'], question_id=self.question_id,
content=self.content, content=self.content,
knowledge=analysis.get('knowledge_points', []), knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', []) literacy=analysis.get('literacy_points', [])
) )
# ================== 测试用例 ================== # ================== 查询接口 ==================
def query_question(question_id: str):
try:
graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
result = graph.run("""
MATCH (q:Question {id: $id})
OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp)
OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp)
RETURN q.content AS content,
collect(kp.name) AS knowledge,
collect(lp.title) AS literacy
""", id=question_id).data()
if result:
data = result[0]
print(f"\n🔍 查询结果ID: {question_id}")
print(f"内容: {data['content']}")
print(f"知识点: {data['knowledge']}")
print(f"素养点: {data['literacy']}")
else:
print("未找到相关题目")
except Exception as e:
print(f"查询错误: {str(e)}")
# ================== 测试执行 ==================
if __name__ == '__main__': if __name__ == '__main__':
test_cases = [ test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱", "小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇" "甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇"
] ]
for question in test_cases: for q in test_cases:
print("\n" + "=" * 80) print("\n" + "=" * 60)
print(f"📚 处理题目:{question}") analyzer = ProblemAnalyzer(q)
analyzer = ProblemAnalyzer(question)
analyzer.execute() analyzer.execute()
''' query_question('6fff79108736')
// 查询题目关联信息
MATCH (q:Question {id: '6fff79108736'})
OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp:KnowledgePoint)
OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp:LiteracyNode)
RETURN
q.content AS 题目内容,
COLLECT(DISTINCT kp.name) AS 关联知识点,
COLLECT(DISTINCT lp.title) AS 关联素养点
'''
Loading…
Cancel
Save