You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
9.6 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
数学题目分析系统 v5.0(离线可用版)
功能特性:
1. 本地规则引擎为主 + 大模型增强(可选)
2. 自动Neo4j数据清洗
3. 完善的错误处理
4. 详细的运行日志
"""
from Config import *
import hashlib
import json
import re
from typing import Dict, List
import jieba
import requests
from py2neo import Graph
from Config import *
# 初始化分词器
jieba.initialize()
# ================== 配置区 ==================
class Config:
# 大模型配置(默认关闭)
LLM_ENABLED = True # 设置为True启用大模型
LLM_TIMEOUT = 8
LLM_MAX_RETRIES = 2
# 系统参数
MAX_CONTENT_LENGTH = 500
# ================== 本地知识库 ==================
class LocalKnowledgeBase:
"""本地规则分析引擎"""
RULES = {
'arithmetic': {
'keywords': ['', '', '', '还剩', '单价', '总价'],
'knowledge': ['四则运算应用(购物问题)'],
'literacy': ['数感培养']
},
'travel': {
'keywords': ['相遇', '速度', '距离', '时间', '出发'],
'knowledge': ['相遇问题解决方案'],
'literacy': ['空间观念']
},
'work': {
'keywords': ['合作', '效率', '工期', '完成', '单独'],
'knowledge': ['工程合作效率计算'],
'literacy': ['模型思想']
},
'geometry': {
'keywords': ['面积', '周长', '体积', '平方', '立方'],
'knowledge': ['几何图形面积计算'],
'literacy': ['空间观念']
},
'ratio': {
'keywords': ['百分比', '浓度', '稀释', '配比'],
'knowledge': ['浓度问题配比计算'],
'literacy': ['数据分析']
}
}
@classmethod
def analyze(cls, content: str) -> dict:
"""本地规则分析"""
result = {
'problem_types': [],
'knowledge_points': [],
'literacy_points': []
}
words = set(jieba.cut(content))
for ptype, config in cls.RULES.items():
if len(words & set(config['keywords'])) >= 2:
result['problem_types'].append(ptype)
result['knowledge_points'].extend(config['knowledge'])
result['literacy_points'].extend(config['literacy'])
return result
# ================== 大模型客户端 ==================
class LLMClient:
"""增强版大模型客户端"""
def __init__(self):
self.enabled = Config.LLM_ENABLED
self.base_url = MODEL_API_URL
self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"}
def analyze_problem(self, content: str) -> dict:
"""带重试机制的分析请求"""
if not self.enabled:
return {}
for attempt in range(Config.LLM_MAX_RETRIES):
try:
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": MODEL_NAME,
"messages": [{
"role": "user",
"content": f"分析数学题目:{content}"
}],
"temperature": 0.3
},
timeout=Config.LLM_TIMEOUT
)
response.raise_for_status()
return self._parse_response(response.json())
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误(尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}: {str(e)}")
return {}
def _parse_response(self, response: dict) -> dict:
"""解析大模型响应"""
try:
content = response['choices'][0]['message']['content']
return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group())
except Exception as e:
print(f"🔴 解析失败: {str(e)}")
return {}
# ================== 知识图谱管理 ==================
class KnowledgeManager:
"""增强版知识图谱管理器"""
def __init__(self):
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
self._clean_data()
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
def _clean_data(self):
"""自动数据清洗"""
self.graph.run("""
MATCH (n)
WHERE n.name CONTAINS '测试' OR n.id IS NULL
DETACH DELETE n
""")
def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点"""
return {rec['n.id']: rec['n.name']
for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
def _load_literacy(self) -> Dict[str, str]:
"""加载素养点"""
return {rec['n.value']: rec['n.title']
for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
"""事务化存储方法"""
tx = self.graph.begin()
try:
# 转义特殊字符
safe_content = content.replace("'", "\\'")
# 创建/更新题目节点
tx.run(f"""
MERGE (q:Question {{id: '{question_id}'}})
SET q.content = '{safe_content}'
""")
# 关联知识点
for kp_name in knowledge:
if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
tx.run(f"""
MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
WITH kp
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
""")
# 关联素养点
for lit_name in literacy:
if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
tx.run(f"""
MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
WITH lp
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
""")
self.graph.commit(tx)
print("✅ 数据存储成功")
except Exception as e:
self.graph.rollback(tx)
print(f"❌ 存储失败: {str(e)}")
# ================== 核心分析引擎 ==================
class ProblemAnalyzer:
"""题目分析处理器"""
def __init__(self, content: str):
self.original = content
self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager()
self.llm = LLMClient()
def _preprocess(self, text: str) -> str:
"""文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""执行分析流程"""
# 本地规则分析
local_result = LocalKnowledgeBase.analyze(self.content)
# 大模型分析
llm_result = self.llm.analyze_problem(self.original)
# 结果融合
return {
"problem_id": self.question_id,
"problem_types": list(set(
local_result.get('problem_types', []) +
llm_result.get('problem_types', [])
))[:3],
"knowledge_points": list(set(
local_result.get('knowledge_points', []) +
llm_result.get('knowledge_points', [])
))[:2],
"literacy_points": list(set(
local_result.get('literacy_points', []) +
llm_result.get('literacy_points', [])
))[:2]
}
def execute(self):
"""执行完整流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...")
analysis = self.analyze()
print("\n📊 分析报告:")
print(f" 题型识别:{analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis(
question_id=analysis['problem_id'],
content=self.content,
knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', [])
)
# ================== 测试用例 ==================
if __name__ == '__main__':
test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇",
"一项工程甲队单独做需要10天乙队需要15天两队合作需要多少天",
"一个长方形长8cm宽5cm求面积和周长",
"含盐20%的盐水500克要配成15%的盐水,需加水多少克?"
]
for question in test_cases:
print("\n" + "=" * 80)
print(f"📚 处理题目:{question}")
analyzer = ProblemAnalyzer(question)
analyzer.execute()