main
黄海 5 months ago
parent e37db2d656
commit 81d2539f21

@ -1,235 +1,256 @@
# -*- coding: utf-8 -*-
"""
数学题目分析系统 v6.1严格匹配版
功能特性
1. 纯大模型分析
2. 流式响应处理
3. 严格匹配已有节点
4. 不创建新知识点/素养点
数学题目分析系统 v6.3稳定流式处理版
"""
import re
import json
import hashlib
from typing import Dict, List
import requests
from py2neo import Graph
from Config import *
from typing import Dict, List
from Config import NEO4J_URI, NEO4J_AUTH, MODEL_API_URL, MODEL_API_KEY, MODEL_NAME
# ================== 配置 ==================
# ================== 配置 ==================
class Config:
# Neo4j配置
NEO4J_URI = NEO4J_URI
NEO4J_AUTH = NEO4J_AUTH
# 大模型配置(示例为阿里云配置)
MODEL_API_URL = MODEL_API_URL
MODEL_API_KEY = MODEL_API_KEY
MODEL_NAME = MODEL_NAME
# 超时配置
STREAM_TIMEOUT = 30 # 流式响应总超时
CHUNK_TIMEOUT = 5 # 单次数据块等待超时
# 系统参数
MAX_CONTENT_LENGTH = 500
# ================== 流式大模型客户端 ==================
class StreamLLMClient:
"""支持流式响应的大模型客户端"""
def __init__(self):
self.base_url = Config.MODEL_API_URL
self.headers = {
"Authorization": f"Bearer {Config.MODEL_API_KEY}",
"Content-Type": "application/json"
}
def analyze_problem(self, content: str) -> dict:
"""流式响应分析"""
try:
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": Config.MODEL_NAME,
"messages": [{
"role": "user",
"content": f"""请严格按JSON格式分析数学题目
{{
"problem_types": ["题型列表"],
"knowledge_points": ["知识点名称(必须与数据库完全一致)"],
"literacy_points": ["素养点名称(必须与数据库完全一致)"]
}}
题目{content}"""
}],
"temperature": 0.2,
"stream": True
},
timeout=Config.STREAM_TIMEOUT,
stream=True
)
response.raise_for_status()
return self._process_stream(response)
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误: {str(e)}")
return {}
except Exception as e:
print(f"🔴 解析失败: {str(e)}")
return {}
def _process_stream(self, response) -> dict:
"""处理流式响应"""
full_content = ""
for chunk in response.iter_lines():
if chunk:
decoded_chunk = chunk.decode('utf-8')
if decoded_chunk.startswith("data:"):
try:
chunk_data = json.loads(decoded_chunk[5:])
content = chunk_data['choices'][0]['delta'].get('content', '')
full_content += content
except:
continue
try:
json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group()
return json.loads(json_str)
except:
print("🔴 无法解析大模型输出")
return {}
STREAM_TIMEOUT = 30
# ================== 知识图谱管理 ==================
class KnowledgeManager:
"""严格匹配型知识图谱管理器"""
def __init__(self):
self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
self._verify_connection()
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
print("✅ 知识库加载完成")
print(f"有效知识点({len(self.knowledge_map)}个): {list(self.knowledge_map.keys())[:3]}...")
print(f"有效素养点({len(self.literacy_map)}个): {list(self.literacy_map.keys())[:3]}...")
def _verify_connection(self):
"""安全连接验证"""
try:
self.graph.run("RETURN 1 AS test")
print("✅ Neo4j连接验证成功")
except Exception as e:
print(f"❌ Neo4j连接失败: {str(e)}")
raise
def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点映射id -> name"""
return {rec['n.id']: rec['n.name']
for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
"""安全加载知识点"""
records = self.graph.run("MATCH (n:KnowledgePoint) WHERE n.name IS NOT NULL RETURN n.id, n.name").data()
return {rec['n.name'].strip(): rec['n.id'] for rec in records if rec['n.name'] and rec['n.name'].strip()}
def _load_literacy(self) -> Dict[str, str]:
"""加载素养点映射value -> title"""
return {rec['n.value']: rec['n.title']
for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
"""安全加载素养点"""
records = self.graph.run("MATCH (n:LiteracyNode) WHERE n.title IS NOT NULL RETURN n.value, n.title").data()
return {rec['n.title'].strip(): rec['n.value'] for rec in records if rec['n.title'] and rec['n.title'].strip()}
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
"""使用参数化查询解决转义问题"""
"""增强存储方法"""
tx = self.graph.begin()
try:
# 使用参数化查询避免转义问题
tx.run(
"MERGE (q:Question {id: $question_id}) "
"SET q.content = $content",
{
"question_id": question_id,
"content": content
}
)
# 关联知识点(参数化版本)
for kp_name in knowledge:
if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
tx.run(
"MATCH (kp:KnowledgePoint {id: $kp_id}) "
"MATCH (q:Question {id: $qid}) "
"MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)",
{"kp_id": kp_id, "qid": question_id}
)
# 关联素养点(参数化版本)
for lit_name in literacy:
if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
tx.run(
"MATCH (lp:LiteracyNode {value: $lit_id}) "
"MATCH (q:Question {id: $qid}) "
"MERGE (q)-[:DEVELOPS_LITERACY]->(lp)",
{"lit_id": lit_id, "qid": question_id}
)
# 创建/更新题目节点
tx.run("""
MERGE (q:Question {id: $id})
SET q.content = $content, q.updateTime = timestamp()
""", {"id": question_id, "content": content})
# 关联知识点
valid_kp = []
for name in knowledge:
clean_name = name.strip()
if kp_id := self.knowledge_map.get(clean_name):
tx.run("""
MATCH (q:Question {id: $qid}), (kp:KnowledgePoint {id: $kpid})
MERGE (q)-[r:REQUIRES_KNOWLEDGE]->(kp)
SET r.lastUsed = timestamp()
""", {"qid": question_id, "kpid": kp_id})
valid_kp.append(clean_name)
# 关联素养点
valid_lp = []
for title in literacy:
clean_title = title.strip()
if lit_id := self.literacy_map.get(clean_title):
tx.run("""
MATCH (q:Question {id: $qid}), (lp:LiteracyNode {value: $lpid})
MERGE (q)-[r:DEVELOPS_LITERACY]->(lp)
SET r.lastUsed = timestamp()
""", {"qid": question_id, "lpid": lit_id})
valid_lp.append(clean_title)
self.graph.commit(tx)
print("数据存储成功")
print(f"存储成功 - 知识点: {valid_kp}, 素养点: {valid_lp}")
except Exception as e:
self.graph.rollback(tx)
print(f"❌ 存储失败: {str(e)}")
# ================== 核心分析引擎 ==================
class ProblemAnalyzer:
"""纯大模型分析引擎"""
# ================== 大模型客户端 ==================
class StreamLLMClient:
def __init__(self, kg: KnowledgeManager):
self.kg = kg
self.base_url = Config.MODEL_API_URL
self.headers = {
"Authorization": f"Bearer {Config.MODEL_API_KEY}",
"Content-Type": "application/json",
"Accept": "application/json"
}
def analyze_problem(self, content: str) -> dict:
"""增强的流式分析"""
try:
prompt = self._build_prompt(content)
response = self._send_request(prompt)
return self._process_stream(response)
except Exception as e:
print(f"🔴 分析失败: {str(e)}")
return {}
def _build_prompt(self, content: str) -> str:
"""构建精准提示词"""
return f"""请严格按以下要求分析数学题目:
1. 知识点必须完全匹配以下列表中的名称不要新增或修改
{self.kg.knowledge_map.keys()}
2. 素养点必须完全匹配以下列表中的名称
{self.kg.literacy_map.keys()}
3. 返回严格JSON格式
{{
"problem_types": ["题型"],
"knowledge_points": ["匹配的知识点"],
"literacy_points": ["匹配的素养点"]
}}
题目内容{content}"""
def _send_request(self, prompt: str):
"""发送API请求"""
return requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": Config.MODEL_NAME,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.1,
"stream": True
},
timeout=Config.STREAM_TIMEOUT,
stream=True
)
def _process_stream(self, response) -> dict:
"""可靠的流式处理"""
full_content = ""
try:
for chunk in response.iter_lines():
if chunk:
decoded = chunk.decode('utf-8').strip()
if decoded.startswith('data:'):
json_str = decoded[5:].strip()
if json_str == "[DONE]":
break
try:
data = json.loads(json_str)
if content := data['choices'][0]['delta'].get('content'):
full_content += content
except:
continue
# 调试日志
print(f"原始响应内容:\n{full_content}")
# 提取有效JSON
json_str = re.search(r'\{[\s\S]*\}', full_content).group()
return json.loads(json_str)
except json.JSONDecodeError:
print(f"⚠️ JSON解析失败原始内容{full_content}")
return {}
except Exception as e:
print(f"流处理异常:{str(e)}")
return {}
# ================== 核心引擎 ==================
class ProblemAnalyzer:
def __init__(self, content: str):
self.original = content
self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager()
self.llm = StreamLLMClient()
self.llm = StreamLLMClient(self.kg)
def _preprocess(self, text: str) -> str:
"""文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""纯大模型分析"""
result = self.llm.analyze_problem(self.original)
return {
"problem_id": self.question_id,
"problem_types": result.get('problem_types', [])[:3],
"knowledge_points": result.get('knowledge_points', [])[:2],
"literacy_points": result.get('literacy_points', [])[:2]
}
def execute(self):
"""执行完整流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...")
"""执行分析流程"""
print(f"\n🔍 分析题目: {self.original[:50]}...")
analysis = self.analyze()
analysis = self.llm.analyze_problem(self.original)
if not analysis:
print("⚠️ 大模型分析失败")
return
print("\n📊 大模型分析报告:")
print(f" 题型识别:{analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}")
print("\n📊 分析结果:")
print(f" 题型: {analysis.get('problem_types', [])}")
print(f" 知识点: {analysis.get('knowledge_points', [])}")
print(f" 素养点: {analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis(
question_id=analysis['problem_id'],
question_id=self.question_id,
content=self.content,
knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', [])
)
# ================== 测试用例 ==================
# ================== 查询接口 ==================
def query_question(question_id: str):
try:
graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
result = graph.run("""
MATCH (q:Question {id: $id})
OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp)
OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp)
RETURN q.content AS content,
collect(kp.name) AS knowledge,
collect(lp.title) AS literacy
""", id=question_id).data()
if result:
data = result[0]
print(f"\n🔍 查询结果ID: {question_id}")
print(f"内容: {data['content']}")
print(f"知识点: {data['knowledge']}")
print(f"素养点: {data['literacy']}")
else:
print("未找到相关题目")
except Exception as e:
print(f"查询错误: {str(e)}")
# ================== 测试执行 ==================
if __name__ == '__main__':
test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇"
]
for question in test_cases:
print("\n" + "=" * 80)
print(f"📚 处理题目:{question}")
analyzer = ProblemAnalyzer(question)
for q in test_cases:
print("\n" + "=" * 60)
analyzer = ProblemAnalyzer(q)
analyzer.execute()
'''
// 查询题目关联信息
MATCH (q:Question {id: '6fff79108736'})
OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp:KnowledgePoint)
OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp:LiteracyNode)
RETURN
q.content AS 题目内容,
COLLECT(DISTINCT kp.name) AS 关联知识点,
COLLECT(DISTINCT lp.title) AS 关联素养点
'''
query_question('6fff79108736')
Loading…
Cancel
Save