You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

274 lines
9.6 KiB

5 months ago
# -*- coding: utf-8 -*-
5 months ago
"""
数学题目分析系统 v5.0离线可用版
功能特性
1. 本地规则引擎为主 + 大模型增强可选
2. 自动Neo4j数据清洗
3. 完善的错误处理
4. 详细的运行日志
"""
5 months ago
from Config import *
5 months ago
5 months ago
import hashlib
5 months ago
import json
import re
from typing import Dict, List
import jieba
import requests
5 months ago
from py2neo import Graph
from Config import *
5 months ago
5 months ago
# 初始化分词器
jieba.initialize()
5 months ago
5 months ago
5 months ago
# ================== 配置区 ==================
class Config:
5 months ago
# 大模型配置(默认关闭)
5 months ago
LLM_ENABLED = True # 设置为True启用大模型
5 months ago
LLM_TIMEOUT = 8
LLM_MAX_RETRIES = 2
5 months ago
# 系统参数
MAX_CONTENT_LENGTH = 500
5 months ago
5 months ago
# ================== 本地知识库 ==================
5 months ago
class LocalKnowledgeBase:
5 months ago
"""本地规则分析引擎"""
5 months ago
RULES = {
'arithmetic': {
'keywords': ['', '', '', '还剩', '单价', '总价'],
'knowledge': ['四则运算应用(购物问题)'],
'literacy': ['数感培养']
},
'travel': {
'keywords': ['相遇', '速度', '距离', '时间', '出发'],
'knowledge': ['相遇问题解决方案'],
'literacy': ['空间观念']
},
'work': {
'keywords': ['合作', '效率', '工期', '完成', '单独'],
'knowledge': ['工程合作效率计算'],
'literacy': ['模型思想']
},
'geometry': {
'keywords': ['面积', '周长', '体积', '平方', '立方'],
'knowledge': ['几何图形面积计算'],
'literacy': ['空间观念']
},
'ratio': {
'keywords': ['百分比', '浓度', '稀释', '配比'],
'knowledge': ['浓度问题配比计算'],
'literacy': ['数据分析']
}
}
5 months ago
5 months ago
@classmethod
def analyze(cls, content: str) -> dict:
"""本地规则分析"""
result = {
'problem_types': [],
'knowledge_points': [],
'literacy_points': []
}
words = set(jieba.cut(content))
for ptype, config in cls.RULES.items():
5 months ago
if len(words & set(config['keywords'])) >= 2:
5 months ago
result['problem_types'].append(ptype)
result['knowledge_points'].extend(config['knowledge'])
result['literacy_points'].extend(config['literacy'])
return result
5 months ago
5 months ago
5 months ago
# ================== 大模型客户端 ==================
5 months ago
class LLMClient:
5 months ago
"""增强版大模型客户端"""
5 months ago
def __init__(self):
self.enabled = Config.LLM_ENABLED
self.base_url = MODEL_API_URL
5 months ago
self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"}
5 months ago
def analyze_problem(self, content: str) -> dict:
5 months ago
"""带重试机制的分析请求"""
5 months ago
if not self.enabled:
return {}
5 months ago
5 months ago
for attempt in range(Config.LLM_MAX_RETRIES):
try:
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": MODEL_NAME,
"messages": [{
"role": "user",
"content": f"分析数学题目:{content}"
}],
"temperature": 0.3
},
timeout=Config.LLM_TIMEOUT
)
response.raise_for_status()
return self._parse_response(response.json())
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误(尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}: {str(e)}")
return {}
def _parse_response(self, response: dict) -> dict:
5 months ago
"""解析大模型响应"""
5 months ago
try:
5 months ago
content = response['choices'][0]['message']['content']
return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group())
except Exception as e:
print(f"🔴 解析失败: {str(e)}")
5 months ago
return {}
5 months ago
5 months ago
5 months ago
# ================== 知识图谱管理 ==================
5 months ago
class KnowledgeManager:
5 months ago
"""增强版知识图谱管理器"""
5 months ago
def __init__(self):
5 months ago
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
5 months ago
self._clean_data()
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
def _clean_data(self):
5 months ago
"""自动数据清洗"""
5 months ago
self.graph.run("""
MATCH (n)
WHERE n.name CONTAINS '测试' OR n.id IS NULL
DETACH DELETE n
""")
def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点"""
5 months ago
return {rec['n.id']: rec['n.name']
for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
5 months ago
def _load_literacy(self) -> Dict[str, str]:
"""加载素养点"""
5 months ago
return {rec['n.value']: rec['n.title']
for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
5 months ago
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
5 months ago
"""事务化存储方法"""
tx = self.graph.begin()
5 months ago
try:
5 months ago
# 转义特殊字符
safe_content = content.replace("'", "\\'")
# 创建/更新题目节点
tx.run(f"""
MERGE (q:Question {{id: '{question_id}'}})
SET q.content = '{safe_content}'
""")
5 months ago
# 关联知识点
for kp_name in knowledge:
5 months ago
if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
5 months ago
tx.run(f"""
5 months ago
MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
5 months ago
WITH kp
5 months ago
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
""")
# 关联素养点
for lit_name in literacy:
5 months ago
if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
5 months ago
tx.run(f"""
5 months ago
MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
5 months ago
WITH lp
5 months ago
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
""")
5 months ago
5 months ago
self.graph.commit(tx)
5 months ago
print("✅ 数据存储成功")
5 months ago
except Exception as e:
5 months ago
self.graph.rollback(tx)
5 months ago
print(f"❌ 存储失败: {str(e)}")
5 months ago
# ================== 核心分析引擎 ==================
5 months ago
class ProblemAnalyzer:
5 months ago
"""题目分析处理器"""
5 months ago
def __init__(self, content: str):
self.original = content
self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager()
self.llm = LLMClient()
def _preprocess(self, text: str) -> str:
"""文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""执行分析流程"""
# 本地规则分析
local_result = LocalKnowledgeBase.analyze(self.content)
5 months ago
# 大模型分析
5 months ago
llm_result = self.llm.analyze_problem(self.original)
# 结果融合
return {
"problem_id": self.question_id,
"problem_types": list(set(
local_result.get('problem_types', []) +
llm_result.get('problem_types', [])
))[:3],
"knowledge_points": list(set(
local_result.get('knowledge_points', []) +
llm_result.get('knowledge_points', [])
))[:2],
"literacy_points": list(set(
local_result.get('literacy_points', []) +
llm_result.get('literacy_points', [])
))[:2]
}
def execute(self):
"""执行完整流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...")
analysis = self.analyze()
print("\n📊 分析报告:")
print(f" 题型识别:{analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis(
question_id=analysis['problem_id'],
content=self.content,
knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', [])
)
# ================== 测试用例 ==================
5 months ago
if __name__ == '__main__':
5 months ago
test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇",
"一项工程甲队单独做需要10天乙队需要15天两队合作需要多少天",
"一个长方形长8cm宽5cm求面积和周长",
"含盐20%的盐水500克要配成15%的盐水,需加水多少克?"
]
for question in test_cases:
print("\n" + "=" * 80)
5 months ago
print(f"📚 处理题目:{question}")
5 months ago
analyzer = ProblemAnalyzer(question)
analyzer.execute()