main
黄海 6 months ago
parent e37db2d656
commit 81d2539f21

@ -1,235 +1,256 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
数学题目分析系统 v6.1严格匹配版 数学题目分析系统 v6.3稳定流式处理版
功能特性
1. 纯大模型分析
2. 流式响应处理
3. 严格匹配已有节点
4. 不创建新知识点/素养点
""" """
import re import re
import json import json
import hashlib import hashlib
from typing import Dict, List
import requests import requests
from py2neo import Graph from py2neo import Graph
from Config import * from typing import Dict, List
from Config import NEO4J_URI, NEO4J_AUTH, MODEL_API_URL, MODEL_API_KEY, MODEL_NAME
# ================== 配置 ================== # ================== 配置 ==================
class Config: class Config:
# Neo4j配置
NEO4J_URI = NEO4J_URI NEO4J_URI = NEO4J_URI
NEO4J_AUTH = NEO4J_AUTH NEO4J_AUTH = NEO4J_AUTH
# 大模型配置(示例为阿里云配置)
MODEL_API_URL = MODEL_API_URL MODEL_API_URL = MODEL_API_URL
MODEL_API_KEY = MODEL_API_KEY MODEL_API_KEY = MODEL_API_KEY
MODEL_NAME = MODEL_NAME MODEL_NAME = MODEL_NAME
# 超时配置
STREAM_TIMEOUT = 30 # 流式响应总超时
CHUNK_TIMEOUT = 5 # 单次数据块等待超时
# 系统参数
MAX_CONTENT_LENGTH = 500 MAX_CONTENT_LENGTH = 500
STREAM_TIMEOUT = 30
# ================== 流式大模型客户端 ==================
class StreamLLMClient:
"""支持流式响应的大模型客户端"""
def __init__(self):
self.base_url = Config.MODEL_API_URL
self.headers = {
"Authorization": f"Bearer {Config.MODEL_API_KEY}",
"Content-Type": "application/json"
}
def analyze_problem(self, content: str) -> dict:
"""流式响应分析"""
try:
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": Config.MODEL_NAME,
"messages": [{
"role": "user",
"content": f"""请严格按JSON格式分析数学题目
{{
"problem_types": ["题型列表"],
"knowledge_points": ["知识点名称(必须与数据库完全一致)"],
"literacy_points": ["素养点名称(必须与数据库完全一致)"]
}}
题目{content}"""
}],
"temperature": 0.2,
"stream": True
},
timeout=Config.STREAM_TIMEOUT,
stream=True
)
response.raise_for_status()
return self._process_stream(response)
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误: {str(e)}")
return {}
except Exception as e:
print(f"🔴 解析失败: {str(e)}")
return {}
def _process_stream(self, response) -> dict:
"""处理流式响应"""
full_content = ""
for chunk in response.iter_lines():
if chunk:
decoded_chunk = chunk.decode('utf-8')
if decoded_chunk.startswith("data:"):
try:
chunk_data = json.loads(decoded_chunk[5:])
content = chunk_data['choices'][0]['delta'].get('content', '')
full_content += content
except:
continue
try:
json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group()
return json.loads(json_str)
except:
print("🔴 无法解析大模型输出")
return {}
# ================== 知识图谱管理 ================== # ================== 知识图谱管理 ==================
class KnowledgeManager: class KnowledgeManager:
"""严格匹配型知识图谱管理器"""
def __init__(self): def __init__(self):
self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH) self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
self._verify_connection()
self.knowledge_map = self._load_knowledge() self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy() self.literacy_map = self._load_literacy()
print("✅ 知识库加载完成")
print(f"有效知识点({len(self.knowledge_map)}个): {list(self.knowledge_map.keys())[:3]}...")
print(f"有效素养点({len(self.literacy_map)}个): {list(self.literacy_map.keys())[:3]}...")
def _verify_connection(self):
"""安全连接验证"""
try:
self.graph.run("RETURN 1 AS test")
print("✅ Neo4j连接验证成功")
except Exception as e:
print(f"❌ Neo4j连接失败: {str(e)}")
raise
def _load_knowledge(self) -> Dict[str, str]: def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点映射id -> name""" """安全加载知识点"""
return {rec['n.id']: rec['n.name'] records = self.graph.run("MATCH (n:KnowledgePoint) WHERE n.name IS NOT NULL RETURN n.id, n.name").data()
for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} return {rec['n.name'].strip(): rec['n.id'] for rec in records if rec['n.name'] and rec['n.name'].strip()}
def _load_literacy(self) -> Dict[str, str]: def _load_literacy(self) -> Dict[str, str]:
"""加载素养点映射value -> title""" """安全加载素养点"""
return {rec['n.value']: rec['n.title'] records = self.graph.run("MATCH (n:LiteracyNode) WHERE n.title IS NOT NULL RETURN n.value, n.title").data()
for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")} return {rec['n.title'].strip(): rec['n.value'] for rec in records if rec['n.title'] and rec['n.title'].strip()}
def store_analysis(self, question_id: str, content: str, def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]): knowledge: List[str], literacy: List[str]):
"""使用参数化查询解决转义问题""" """增强存储方法"""
tx = self.graph.begin() tx = self.graph.begin()
try: try:
# 使用参数化查询避免转义问题 # 创建/更新题目节点
tx.run( tx.run("""
"MERGE (q:Question {id: $question_id}) " MERGE (q:Question {id: $id})
"SET q.content = $content", SET q.content = $content, q.updateTime = timestamp()
{ """, {"id": question_id, "content": content})
"question_id": question_id,
"content": content # 关联知识点
} valid_kp = []
) for name in knowledge:
clean_name = name.strip()
# 关联知识点(参数化版本) if kp_id := self.knowledge_map.get(clean_name):
for kp_name in knowledge: tx.run("""
if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None): MATCH (q:Question {id: $qid}), (kp:KnowledgePoint {id: $kpid})
tx.run( MERGE (q)-[r:REQUIRES_KNOWLEDGE]->(kp)
"MATCH (kp:KnowledgePoint {id: $kp_id}) " SET r.lastUsed = timestamp()
"MATCH (q:Question {id: $qid}) " """, {"qid": question_id, "kpid": kp_id})
"MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)", valid_kp.append(clean_name)
{"kp_id": kp_id, "qid": question_id}
) # 关联素养点
valid_lp = []
# 关联素养点(参数化版本) for title in literacy:
for lit_name in literacy: clean_title = title.strip()
if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None): if lit_id := self.literacy_map.get(clean_title):
tx.run( tx.run("""
"MATCH (lp:LiteracyNode {value: $lit_id}) " MATCH (q:Question {id: $qid}), (lp:LiteracyNode {value: $lpid})
"MATCH (q:Question {id: $qid}) " MERGE (q)-[r:DEVELOPS_LITERACY]->(lp)
"MERGE (q)-[:DEVELOPS_LITERACY]->(lp)", SET r.lastUsed = timestamp()
{"lit_id": lit_id, "qid": question_id} """, {"qid": question_id, "lpid": lit_id})
) valid_lp.append(clean_title)
self.graph.commit(tx) self.graph.commit(tx)
print("数据存储成功") print(f"存储成功 - 知识点: {valid_kp}, 素养点: {valid_lp}")
except Exception as e: except Exception as e:
self.graph.rollback(tx) self.graph.rollback(tx)
print(f"❌ 存储失败: {str(e)}") print(f"❌ 存储失败: {str(e)}")
# ================== 核心分析引擎 ================== # ================== 大模型客户端 ==================
class ProblemAnalyzer: class StreamLLMClient:
"""纯大模型分析引擎""" def __init__(self, kg: KnowledgeManager):
self.kg = kg
self.base_url = Config.MODEL_API_URL
self.headers = {
"Authorization": f"Bearer {Config.MODEL_API_KEY}",
"Content-Type": "application/json",
"Accept": "application/json"
}
def analyze_problem(self, content: str) -> dict:
"""增强的流式分析"""
try:
prompt = self._build_prompt(content)
response = self._send_request(prompt)
return self._process_stream(response)
except Exception as e:
print(f"🔴 分析失败: {str(e)}")
return {}
def _build_prompt(self, content: str) -> str:
"""构建精准提示词"""
return f"""请严格按以下要求分析数学题目:
1. 知识点必须完全匹配以下列表中的名称不要新增或修改
{self.kg.knowledge_map.keys()}
2. 素养点必须完全匹配以下列表中的名称
{self.kg.literacy_map.keys()}
3. 返回严格JSON格式
{{
"problem_types": ["题型"],
"knowledge_points": ["匹配的知识点"],
"literacy_points": ["匹配的素养点"]
}}
题目内容{content}"""
def _send_request(self, prompt: str):
"""发送API请求"""
return requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": Config.MODEL_NAME,
"messages": [{"role": "user", "content": prompt}],
"temperature": 0.1,
"stream": True
},
timeout=Config.STREAM_TIMEOUT,
stream=True
)
def _process_stream(self, response) -> dict:
"""可靠的流式处理"""
full_content = ""
try:
for chunk in response.iter_lines():
if chunk:
decoded = chunk.decode('utf-8').strip()
if decoded.startswith('data:'):
json_str = decoded[5:].strip()
if json_str == "[DONE]":
break
try:
data = json.loads(json_str)
if content := data['choices'][0]['delta'].get('content'):
full_content += content
except:
continue
# 调试日志
print(f"原始响应内容:\n{full_content}")
# 提取有效JSON
json_str = re.search(r'\{[\s\S]*\}', full_content).group()
return json.loads(json_str)
except json.JSONDecodeError:
print(f"⚠️ JSON解析失败原始内容{full_content}")
return {}
except Exception as e:
print(f"流处理异常:{str(e)}")
return {}
# ================== 核心引擎 ==================
class ProblemAnalyzer:
def __init__(self, content: str): def __init__(self, content: str):
self.original = content self.original = content
self.content = self._preprocess(content) self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12] self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager() self.kg = KnowledgeManager()
self.llm = StreamLLMClient() self.llm = StreamLLMClient(self.kg)
def _preprocess(self, text: str) -> str: def _preprocess(self, text: str) -> str:
"""文本预处理""" """文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH] return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""纯大模型分析"""
result = self.llm.analyze_problem(self.original)
return {
"problem_id": self.question_id,
"problem_types": result.get('problem_types', [])[:3],
"knowledge_points": result.get('knowledge_points', [])[:2],
"literacy_points": result.get('literacy_points', [])[:2]
}
def execute(self): def execute(self):
"""执行完整流程""" """执行分析流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...") print(f"\n🔍 分析题目: {self.original[:50]}...")
analysis = self.analyze() analysis = self.llm.analyze_problem(self.original)
if not analysis:
print("⚠️ 大模型分析失败")
return
print("\n📊 大模型分析报告:") print("\n📊 分析结果:")
print(f" 题型识别:{analysis.get('problem_types', [])}") print(f" 题型: {analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}") print(f" 知识点: {analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}") print(f" 素养点: {analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis( self.kg.store_analysis(
question_id=analysis['problem_id'], question_id=self.question_id,
content=self.content, content=self.content,
knowledge=analysis.get('knowledge_points', []), knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', []) literacy=analysis.get('literacy_points', [])
) )
# ================== 测试用例 ================== # ================== 查询接口 ==================
def query_question(question_id: str):
try:
graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
result = graph.run("""
MATCH (q:Question {id: $id})
OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp)
OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp)
RETURN q.content AS content,
collect(kp.name) AS knowledge,
collect(lp.title) AS literacy
""", id=question_id).data()
if result:
data = result[0]
print(f"\n🔍 查询结果ID: {question_id}")
print(f"内容: {data['content']}")
print(f"知识点: {data['knowledge']}")
print(f"素养点: {data['literacy']}")
else:
print("未找到相关题目")
except Exception as e:
print(f"查询错误: {str(e)}")
# ================== 测试执行 ==================
if __name__ == '__main__': if __name__ == '__main__':
test_cases = [ test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱", "小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇" "甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇"
] ]
for question in test_cases: for q in test_cases:
print("\n" + "=" * 80) print("\n" + "=" * 60)
print(f"📚 处理题目:{question}") analyzer = ProblemAnalyzer(q)
analyzer = ProblemAnalyzer(question)
analyzer.execute() analyzer.execute()
''' query_question('6fff79108736')
// 查询题目关联信息
MATCH (q:Question {id: '6fff79108736'})
OPTIONAL MATCH (q)-[:REQUIRES_KNOWLEDGE]->(kp:KnowledgePoint)
OPTIONAL MATCH (q)-[:DEVELOPS_LITERACY]->(lp:LiteracyNode)
RETURN
q.content AS 题目内容,
COLLECT(DISTINCT kp.name) AS 关联知识点,
COLLECT(DISTINCT lp.title) AS 关联素养点
'''
Loading…
Cancel
Save