You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
9.5 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
"""
数学题目分析系统 v5.0(离线可用版)
功能特性:
1. 本地规则引擎为主 + 大模型增强(可选)
2. 自动Neo4j数据清洗
3. 完善的错误处理
4. 详细的运行日志
"""
import hashlib
import json
import re
from typing import Dict, List
import jieba
import requests
from py2neo import Graph
from Config import *
# 初始化分词器
jieba.initialize()
# ================== 配置区 ==================
class Config:
LLM_ENABLED = True # 设置为True启用大模型
LLM_TIMEOUT = 10
# 系统参数
MAX_CONTENT_LENGTH = 500
# ================== 知识库模块 ==================
class LocalKnowledgeBase:
"""本地知识规则库"""
RULES = {
'arithmetic': {
'name': '四则运算',
'keywords': ['', '', '', '还剩', '单价', '总价'],
'knowledge': ['四则运算应用(购物问题)'],
'literacy': ['数感培养']
},
'travel': {
'name': '行程问题',
'keywords': ['相遇', '速度', '距离', '时间', '出发'],
'knowledge': ['相遇问题解决方案'],
'literacy': ['空间观念']
},
'work': {
'name': '工程问题',
'keywords': ['合作', '效率', '工期', '完成', '单独'],
'knowledge': ['工程合作效率计算'],
'literacy': ['模型思想']
},
'geometry': {
'name': '几何问题',
'keywords': ['面积', '周长', '体积', '平方', '立方'],
'knowledge': ['几何图形面积计算'],
'literacy': ['空间观念']
},
'ratio': {
'name': '比例问题',
'keywords': ['百分比', '浓度', '稀释', '配比'],
'knowledge': ['浓度问题配比计算'],
'literacy': ['数据分析']
}
}
@classmethod
def analyze(cls, content: str) -> dict:
"""本地规则分析"""
result = {
'problem_types': [],
'knowledge_points': [],
'literacy_points': []
}
words = set(jieba.cut(content))
for ptype, config in cls.RULES.items():
matches = words & set(config['keywords'])
if len(matches) >= 2:
result['problem_types'].append(ptype)
result['knowledge_points'].extend(config['knowledge'])
result['literacy_points'].extend(config['literacy'])
return result
# ================== 大模型模块 ==================
class LLMClient:
"""大模型服务客户端(可选)"""
def __init__(self):
self.enabled = Config.LLM_ENABLED
self.base_url = MODEL_API_URL
self.headers = {
"Authorization": f"Bearer {MODEL_API_KEY}",
"Content-Type": "application/json"
}
def analyze_problem(self, content: str) -> dict:
"""大模型分析(可选增强)"""
if not self.enabled:
return {}
try:
payload = {
"model": MODEL_NAME,
"messages": [{
"role": "user",
"content": f"分析数学题目:{content}"
}],
"temperature": 0.3,
"max_tokens": 300
}
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json=payload,
timeout=Config.LLM_TIMEOUT
)
response.raise_for_status()
return self._parse_response(response.json())
except Exception as e:
print(f"⚠️ 大模型分析失败: {str(e)}")
return {}
def _parse_response(self, data: dict) -> dict:
"""解析大模型响应"""
try:
content = data['choices'][0]['message']['content']
return json.loads(content)
except:
return {}
# ================== 知识图谱模块 ==================
class KnowledgeManager:
def __init__(self):
self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
self._clean_data()
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
def _clean_data(self):
"""数据清洗"""
self.graph.run("""
MATCH (n)
WHERE n.name CONTAINS '测试' OR n.id IS NULL
DETACH DELETE n
""")
def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点"""
result = self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")
return {rec['n.id']: rec['n.name'] for rec in result}
def _load_literacy(self) -> Dict[str, str]:
"""加载素养点"""
result = self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")
return {rec['n.value']: rec['n.title'] for rec in result}
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
"""事务化存储方法"""
tx = self.graph.begin()
try:
# 转义特殊字符
safe_content = content.replace("'", "\\'")
# 创建/更新题目节点
tx.run(f"""
MERGE (q:Question {{id: '{question_id}'}})
SET q.content = '{safe_content}'
""")
# 关联知识点
for kp_name in knowledge:
kp_id = next((k for k, v in self.knowledge_map.items() if v == kp_name), None)
if kp_id:
tx.run(f"""
MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
WITH kp
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
""")
# 关联素养点
for lit_name in literacy:
lit_id = next((k for k, v in self.literacy_map.items() if v == lit_name), None)
if lit_id:
tx.run(f"""
MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
WITH lp
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
""")
tx.commit()
print("✅ 数据存储成功")
except Exception as e:
tx.rollback()
print(f"❌ 存储失败: {str(e)}")
# ================== 核心逻辑模块 ==================
class ProblemAnalyzer:
"""题目分析引擎"""
def __init__(self, content: str):
self.original = content
self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager()
self.llm = LLMClient()
def _preprocess(self, text: str) -> str:
"""文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""执行分析流程"""
# 本地规则分析
local_result = LocalKnowledgeBase.analyze(self.content)
# 大模型增强分析
llm_result = self.llm.analyze_problem(self.original)
# 结果融合
return {
"problem_id": self.question_id,
"problem_types": list(set(
local_result.get('problem_types', []) +
llm_result.get('problem_types', [])
))[:3],
"knowledge_points": list(set(
local_result.get('knowledge_points', []) +
llm_result.get('knowledge_points', [])
))[:2],
"literacy_points": list(set(
local_result.get('literacy_points', []) +
llm_result.get('literacy_points', [])
))[:2]
}
def execute(self):
"""执行完整流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...")
analysis = self.analyze()
print("\n📊 分析报告:")
print(f" 题型识别:{analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis(
question_id=analysis['problem_id'],
content=self.content,
knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', [])
)
print("✅ 数据存储完成")
# ================== 测试用例 ==================
if __name__ == '__main__':
test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇",
"一项工程甲队单独做需要10天乙队需要15天两队合作需要多少天",
"一个长方形长8cm宽5cm求面积和周长",
"含盐20%的盐水500克要配成15%的盐水,需加水多少克?"
]
for question in test_cases:
print("\n" + "=" * 80)
analyzer = ProblemAnalyzer(question)
analyzer.execute()