|
|
|
@ -1,149 +1,272 @@
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
import re
|
|
|
|
|
"""
|
|
|
|
|
数学题目分析系统 v5.0(离线可用版)
|
|
|
|
|
功能特性:
|
|
|
|
|
1. 本地规则引擎为主 + 大模型增强(可选)
|
|
|
|
|
2. 自动Neo4j数据清洗
|
|
|
|
|
3. 完善的错误处理
|
|
|
|
|
4. 详细的运行日志
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import hashlib
|
|
|
|
|
import json
|
|
|
|
|
import re
|
|
|
|
|
from typing import Dict, List
|
|
|
|
|
|
|
|
|
|
import jieba
|
|
|
|
|
import requests
|
|
|
|
|
from py2neo import Graph
|
|
|
|
|
from openai import OpenAI
|
|
|
|
|
from Config import *
|
|
|
|
|
# 初始化分词器
|
|
|
|
|
jieba.initialize()
|
|
|
|
|
|
|
|
|
|
# 切割试题
|
|
|
|
|
def split_questions(file_path):
|
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
content = f.read()
|
|
|
|
|
|
|
|
|
|
# 使用正则表达式匹配题目块(包含答案)
|
|
|
|
|
pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
|
|
|
|
|
questions = re.findall(pattern, content, re.DOTALL)
|
|
|
|
|
# ================== 配置区 ==================
|
|
|
|
|
class Config:
|
|
|
|
|
LLM_ENABLED = True # 设置为True启用大模型
|
|
|
|
|
LLM_TIMEOUT = 10
|
|
|
|
|
# 系统参数
|
|
|
|
|
MAX_CONTENT_LENGTH = 500
|
|
|
|
|
|
|
|
|
|
# 清洗每个题目块的空白字符
|
|
|
|
|
cleaned_questions = [q.strip() for q in questions]
|
|
|
|
|
|
|
|
|
|
return cleaned_questions[:10] # 确保只返回前10题
|
|
|
|
|
# ================== 知识库模块 ==================
|
|
|
|
|
class LocalKnowledgeBase:
|
|
|
|
|
"""本地知识规则库"""
|
|
|
|
|
RULES = {
|
|
|
|
|
'arithmetic': {
|
|
|
|
|
'name': '四则运算',
|
|
|
|
|
'keywords': ['买', '卖', '元', '还剩', '单价', '总价'],
|
|
|
|
|
'knowledge': ['四则运算应用(购物问题)'],
|
|
|
|
|
'literacy': ['数感培养']
|
|
|
|
|
},
|
|
|
|
|
'travel': {
|
|
|
|
|
'name': '行程问题',
|
|
|
|
|
'keywords': ['相遇', '速度', '距离', '时间', '出发'],
|
|
|
|
|
'knowledge': ['相遇问题解决方案'],
|
|
|
|
|
'literacy': ['空间观念']
|
|
|
|
|
},
|
|
|
|
|
'work': {
|
|
|
|
|
'name': '工程问题',
|
|
|
|
|
'keywords': ['合作', '效率', '工期', '完成', '单独'],
|
|
|
|
|
'knowledge': ['工程合作效率计算'],
|
|
|
|
|
'literacy': ['模型思想']
|
|
|
|
|
},
|
|
|
|
|
'geometry': {
|
|
|
|
|
'name': '几何问题',
|
|
|
|
|
'keywords': ['面积', '周长', '体积', '平方', '立方'],
|
|
|
|
|
'knowledge': ['几何图形面积计算'],
|
|
|
|
|
'literacy': ['空间观念']
|
|
|
|
|
},
|
|
|
|
|
'ratio': {
|
|
|
|
|
'name': '比例问题',
|
|
|
|
|
'keywords': ['百分比', '浓度', '稀释', '配比'],
|
|
|
|
|
'knowledge': ['浓度问题配比计算'],
|
|
|
|
|
'literacy': ['数据分析']
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
class KnowledgeGraph:
|
|
|
|
|
def __init__(self, content: str):
|
|
|
|
|
self.content = content
|
|
|
|
|
self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
|
|
|
|
|
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
|
|
|
|
|
@classmethod
|
|
|
|
|
def analyze(cls, content: str) -> dict:
|
|
|
|
|
"""本地规则分析"""
|
|
|
|
|
result = {
|
|
|
|
|
'problem_types': [],
|
|
|
|
|
'knowledge_points': [],
|
|
|
|
|
'literacy_points': []
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
words = set(jieba.cut(content))
|
|
|
|
|
for ptype, config in cls.RULES.items():
|
|
|
|
|
matches = words & set(config['keywords'])
|
|
|
|
|
if len(matches) >= 2:
|
|
|
|
|
result['problem_types'].append(ptype)
|
|
|
|
|
result['knowledge_points'].extend(config['knowledge'])
|
|
|
|
|
result['literacy_points'].extend(config['literacy'])
|
|
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
# 双数据源加载
|
|
|
|
|
self.knowledge_points = self._get_knowledge_points()
|
|
|
|
|
self.literacy_points = self._get_literacy_points()
|
|
|
|
|
print(f"已加载知识点:{len(self.knowledge_points)}个,素养点:{len(self.literacy_points)}个")
|
|
|
|
|
|
|
|
|
|
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
|
|
|
|
|
# ================== 大模型模块 ==================
|
|
|
|
|
class LLMClient:
|
|
|
|
|
"""大模型服务客户端(可选)"""
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.enabled = Config.LLM_ENABLED
|
|
|
|
|
self.base_url = MODEL_API_URL
|
|
|
|
|
self.headers = {
|
|
|
|
|
"Authorization": f"Bearer {MODEL_API_KEY}",
|
|
|
|
|
"Content-Type": "application/json"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def analyze_problem(self, content: str) -> dict:
|
|
|
|
|
"""大模型分析(可选增强)"""
|
|
|
|
|
if not self.enabled:
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
def _get_knowledge_points(self) -> dict:
|
|
|
|
|
try:
|
|
|
|
|
return {row['n.id']: row['n.name']
|
|
|
|
|
for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
|
|
|
|
|
payload = {
|
|
|
|
|
"model": MODEL_NAME,
|
|
|
|
|
"messages": [{
|
|
|
|
|
"role": "user",
|
|
|
|
|
"content": f"分析数学题目:{content}"
|
|
|
|
|
}],
|
|
|
|
|
"temperature": 0.3,
|
|
|
|
|
"max_tokens": 300
|
|
|
|
|
}
|
|
|
|
|
response = requests.post(
|
|
|
|
|
f"{self.base_url}/chat/completions",
|
|
|
|
|
headers=self.headers,
|
|
|
|
|
json=payload,
|
|
|
|
|
timeout=Config.LLM_TIMEOUT
|
|
|
|
|
)
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
return self._parse_response(response.json())
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"知识点加载失败:{str(e)}")
|
|
|
|
|
print(f"⚠️ 大模型分析失败: {str(e)}")
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
def _get_literacy_points(self) -> dict:
|
|
|
|
|
def _parse_response(self, data: dict) -> dict:
|
|
|
|
|
"""解析大模型响应"""
|
|
|
|
|
try:
|
|
|
|
|
return {row['n.value']: row['n.title']
|
|
|
|
|
for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"素养点加载失败:{str(e)}")
|
|
|
|
|
content = data['choices'][0]['message']['content']
|
|
|
|
|
return json.loads(content)
|
|
|
|
|
except:
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
def _make_prompt(self) -> str:
|
|
|
|
|
kp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
|
|
|
|
|
lp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.literacy_points.items())[:3]])
|
|
|
|
|
|
|
|
|
|
return f"""请分析题目考查的知识点和核心素养:
|
|
|
|
|
|
|
|
|
|
可用知识点(ID:名称):
|
|
|
|
|
{kp_samples}
|
|
|
|
|
...共{len(self.knowledge_points)}个知识点
|
|
|
|
|
|
|
|
|
|
可用素养点(ID:名称):
|
|
|
|
|
{lp_samples}
|
|
|
|
|
...共{len(self.literacy_points)}个素养点
|
|
|
|
|
|
|
|
|
|
生成要求:
|
|
|
|
|
1. 必须使用上述ID
|
|
|
|
|
2. 按以下格式生成Cypher代码:
|
|
|
|
|
|
|
|
|
|
MERGE (q:Question {{id: "{self.question_id}"}})
|
|
|
|
|
SET q.content = "题目内容"
|
|
|
|
|
WITH q
|
|
|
|
|
MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
|
|
|
|
|
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
|
|
|
|
|
WITH q
|
|
|
|
|
MATCH (lp:LiteracyNode {{value: "素养点ID"}})
|
|
|
|
|
MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""
|
|
|
|
|
|
|
|
|
|
def _clean_cypher(self, code: str) -> str:
|
|
|
|
|
valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
|
|
|
|
|
valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]
|
|
|
|
|
|
|
|
|
|
cleaned = []
|
|
|
|
|
lines = [line.strip() for line in code.split('\n') if line.strip()]
|
|
|
|
|
|
|
|
|
|
for line in lines:
|
|
|
|
|
# 处理知识点匹配
|
|
|
|
|
if 'MATCH (kp:KnowledgePoint' in line:
|
|
|
|
|
if match := re.search(r'id: ["\'](.*?)["\']', line):
|
|
|
|
|
kp_id = match.group(1).upper()
|
|
|
|
|
if kp_id in valid_kp_ids:
|
|
|
|
|
cleaned.append(line.replace(match.group(1), kp_id))
|
|
|
|
|
|
|
|
|
|
# 处理素养点匹配
|
|
|
|
|
elif 'MATCH (lp:LiteracyNode' in line:
|
|
|
|
|
if match := re.search(r'value: ["\'](.*?)["\']', line):
|
|
|
|
|
lp_id = match.group(1).upper()
|
|
|
|
|
if lp_id in valid_lp_ids:
|
|
|
|
|
cleaned.append(line.replace(match.group(1), lp_id))
|
|
|
|
|
|
|
|
|
|
# 保留其他合法语句
|
|
|
|
|
elif line.startswith(('MERGE', 'WITH', 'SET')):
|
|
|
|
|
cleaned.append(line)
|
|
|
|
|
|
|
|
|
|
return '\n'.join(cleaned)
|
|
|
|
|
|
|
|
|
|
def run(self) -> str:
|
|
|
|
|
|
|
|
|
|
# ================== 知识图谱模块 ==================
|
|
|
|
|
class KnowledgeManager:
|
|
|
|
|
"""知识图谱管理器"""
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
|
|
|
|
|
self._clean_data()
|
|
|
|
|
self.knowledge_map = self._load_knowledge()
|
|
|
|
|
self.literacy_map = self._load_literacy()
|
|
|
|
|
|
|
|
|
|
def _clean_data(self):
|
|
|
|
|
"""数据清洗"""
|
|
|
|
|
self.graph.run("""
|
|
|
|
|
MATCH (n)
|
|
|
|
|
WHERE n.name CONTAINS '测试' OR n.id IS NULL
|
|
|
|
|
DETACH DELETE n
|
|
|
|
|
""")
|
|
|
|
|
|
|
|
|
|
def _load_knowledge(self) -> Dict[str, str]:
|
|
|
|
|
"""加载知识点"""
|
|
|
|
|
result = self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")
|
|
|
|
|
return {rec['n.id']: rec['n.name'] for rec in result}
|
|
|
|
|
|
|
|
|
|
def _load_literacy(self) -> Dict[str, str]:
|
|
|
|
|
"""加载素养点"""
|
|
|
|
|
result = self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")
|
|
|
|
|
return {rec['n.value']: rec['n.title'] for rec in result}
|
|
|
|
|
|
|
|
|
|
def store_analysis(self, question_id: str, content: str,
|
|
|
|
|
knowledge: List[str], literacy: List[str]):
|
|
|
|
|
"""存储分析结果"""
|
|
|
|
|
try:
|
|
|
|
|
response = self.client.chat.completions.create(
|
|
|
|
|
model=MODEL_NAME,
|
|
|
|
|
messages=[
|
|
|
|
|
{"role": "system", "content": self._make_prompt()},
|
|
|
|
|
{"role": "user", "content": f"题目内容:{self.content}"}
|
|
|
|
|
]
|
|
|
|
|
# 创建题目节点
|
|
|
|
|
self.graph.run(
|
|
|
|
|
f"MERGE (q:Question {{id: '{question_id}', content: '{content}'}})"
|
|
|
|
|
)
|
|
|
|
|
return self._clean_cypher(response.choices[0].message.content)
|
|
|
|
|
|
|
|
|
|
# 关联知识点
|
|
|
|
|
for kp_name in knowledge:
|
|
|
|
|
kp_id = next((k for k, v in self.knowledge_map.items() if v == kp_name), None)
|
|
|
|
|
if kp_id:
|
|
|
|
|
self.graph.run(f"""
|
|
|
|
|
MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
|
|
|
|
|
WITH q, kp
|
|
|
|
|
MATCH (q:Question {{id: '{question_id}'}})
|
|
|
|
|
MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
|
|
|
|
|
""")
|
|
|
|
|
|
|
|
|
|
# 关联素养点
|
|
|
|
|
for lit_name in literacy:
|
|
|
|
|
lit_id = next((k for k, v in self.literacy_map.items() if v == lit_name), None)
|
|
|
|
|
if lit_id:
|
|
|
|
|
self.graph.run(f"""
|
|
|
|
|
MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
|
|
|
|
|
WITH q, lp
|
|
|
|
|
MATCH (q:Question {{id: '{question_id}'}})
|
|
|
|
|
MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
|
|
|
|
|
""")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"分析失败:{str(e)}")
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def query_relations(self):
|
|
|
|
|
cypher = f"""
|
|
|
|
|
MATCH (q:Question {{id: "{self.question_id}"}})
|
|
|
|
|
OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
|
|
|
|
|
OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
|
|
|
|
|
RETURN
|
|
|
|
|
kp.id AS knowledge_id,
|
|
|
|
|
kp.name AS knowledge_name,
|
|
|
|
|
lp.value AS literacy_id,
|
|
|
|
|
lp.title AS literacy_title"""
|
|
|
|
|
return self.graph.run(cypher).data()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 使用示例
|
|
|
|
|
print(f"❌ 存储失败: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 核心逻辑模块 ==================
|
|
|
|
|
class ProblemAnalyzer:
|
|
|
|
|
"""题目分析引擎"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, content: str):
|
|
|
|
|
self.original = content
|
|
|
|
|
self.content = self._preprocess(content)
|
|
|
|
|
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
|
|
|
|
|
self.kg = KnowledgeManager()
|
|
|
|
|
self.llm = LLMClient()
|
|
|
|
|
|
|
|
|
|
def _preprocess(self, text: str) -> str:
|
|
|
|
|
"""文本预处理"""
|
|
|
|
|
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
|
|
|
|
|
|
|
|
|
|
def analyze(self) -> dict:
|
|
|
|
|
"""执行分析流程"""
|
|
|
|
|
# 本地规则分析
|
|
|
|
|
local_result = LocalKnowledgeBase.analyze(self.content)
|
|
|
|
|
|
|
|
|
|
# 大模型增强分析
|
|
|
|
|
llm_result = self.llm.analyze_problem(self.original)
|
|
|
|
|
|
|
|
|
|
# 结果融合
|
|
|
|
|
return {
|
|
|
|
|
"problem_id": self.question_id,
|
|
|
|
|
"problem_types": list(set(
|
|
|
|
|
local_result.get('problem_types', []) +
|
|
|
|
|
llm_result.get('problem_types', [])
|
|
|
|
|
))[:3],
|
|
|
|
|
"knowledge_points": list(set(
|
|
|
|
|
local_result.get('knowledge_points', []) +
|
|
|
|
|
llm_result.get('knowledge_points', [])
|
|
|
|
|
))[:2],
|
|
|
|
|
"literacy_points": list(set(
|
|
|
|
|
local_result.get('literacy_points', []) +
|
|
|
|
|
llm_result.get('literacy_points', [])
|
|
|
|
|
))[:2]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def execute(self):
|
|
|
|
|
"""执行完整流程"""
|
|
|
|
|
print(f"\n🔍 开始分析题目:{self.original[:50]}...")
|
|
|
|
|
|
|
|
|
|
analysis = self.analyze()
|
|
|
|
|
|
|
|
|
|
print("\n📊 分析报告:")
|
|
|
|
|
print(f" 题型识别:{analysis.get('problem_types', [])}")
|
|
|
|
|
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
|
|
|
|
|
print(f" 关联素养点:{analysis.get('literacy_points', [])}")
|
|
|
|
|
|
|
|
|
|
# 存储到知识图谱
|
|
|
|
|
self.kg.store_analysis(
|
|
|
|
|
question_id=analysis['problem_id'],
|
|
|
|
|
content=self.content,
|
|
|
|
|
knowledge=analysis.get('knowledge_points', []),
|
|
|
|
|
literacy=analysis.get('literacy_points', [])
|
|
|
|
|
)
|
|
|
|
|
print("✅ 数据存储完成")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ================== 测试用例 ==================
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
question_blocks = split_questions('ShiTi.md')
|
|
|
|
|
|
|
|
|
|
# 验证分割结果
|
|
|
|
|
for i, block in enumerate(question_blocks, 1):
|
|
|
|
|
print(f"第{i}题块:")
|
|
|
|
|
print("-" * 50)
|
|
|
|
|
kg = KnowledgeGraph(block)
|
|
|
|
|
|
|
|
|
|
if cypher := kg.run():
|
|
|
|
|
print("生成的Cypher:\n", cypher)
|
|
|
|
|
kg.graph.run(cypher)
|
|
|
|
|
print("关联结果:")
|
|
|
|
|
for record in kg.query_relations():
|
|
|
|
|
print(f"知识点:{record['knowledge_name']} ({record['knowledge_id']})")
|
|
|
|
|
print(f"素养点:{record['literacy_title']} ({record['literacy_id']})")
|
|
|
|
|
test_cases = [
|
|
|
|
|
"小明用50元买了3本笔记本,每本8元,还剩多少钱?",
|
|
|
|
|
"甲乙两车相距300公里,甲车速度60km/h,乙车40km/h,几小时后相遇?",
|
|
|
|
|
"一项工程甲队单独做需要10天,乙队需要15天,两队合作需要多少天?",
|
|
|
|
|
"一个长方形长8cm,宽5cm,求面积和周长",
|
|
|
|
|
"含盐20%的盐水500克,要配成15%的盐水,需加水多少克?"
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
for question in test_cases:
|
|
|
|
|
print("\n" + "=" * 80)
|
|
|
|
|
analyzer = ProblemAnalyzer(question)
|
|
|
|
|
analyzer.execute()
|