main
黄海 5 months ago
parent e0c4779627
commit 4b15e9d24d

@ -1,149 +1,272 @@
# -*- coding: utf-8 -*-
import re
"""
数学题目分析系统 v5.0离线可用版
功能特性
1. 本地规则引擎为主 + 大模型增强可选
2. 自动Neo4j数据清洗
3. 完善的错误处理
4. 详细的运行日志
"""
import hashlib
import json
import re
from typing import Dict, List
import jieba
import requests
from py2neo import Graph
from openai import OpenAI
from Config import *
# 初始化分词器
jieba.initialize()
# 切割试题
def split_questions(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 使用正则表达式匹配题目块(包含答案)
pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
questions = re.findall(pattern, content, re.DOTALL)
# ================== 配置区 ==================
class Config:
LLM_ENABLED = True # 设置为True启用大模型
LLM_TIMEOUT = 10
# 系统参数
MAX_CONTENT_LENGTH = 500
# 清洗每个题目块的空白字符
cleaned_questions = [q.strip() for q in questions]
return cleaned_questions[:10] # 确保只返回前10题
# ================== 知识库模块 ==================
class LocalKnowledgeBase:
"""本地知识规则库"""
RULES = {
'arithmetic': {
'name': '四则运算',
'keywords': ['', '', '', '还剩', '单价', '总价'],
'knowledge': ['四则运算应用(购物问题)'],
'literacy': ['数感培养']
},
'travel': {
'name': '行程问题',
'keywords': ['相遇', '速度', '距离', '时间', '出发'],
'knowledge': ['相遇问题解决方案'],
'literacy': ['空间观念']
},
'work': {
'name': '工程问题',
'keywords': ['合作', '效率', '工期', '完成', '单独'],
'knowledge': ['工程合作效率计算'],
'literacy': ['模型思想']
},
'geometry': {
'name': '几何问题',
'keywords': ['面积', '周长', '体积', '平方', '立方'],
'knowledge': ['几何图形面积计算'],
'literacy': ['空间观念']
},
'ratio': {
'name': '比例问题',
'keywords': ['百分比', '浓度', '稀释', '配比'],
'knowledge': ['浓度问题配比计算'],
'literacy': ['数据分析']
}
}
class KnowledgeGraph:
def __init__(self, content: str):
self.content = content
self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
@classmethod
def analyze(cls, content: str) -> dict:
"""本地规则分析"""
result = {
'problem_types': [],
'knowledge_points': [],
'literacy_points': []
}
words = set(jieba.cut(content))
for ptype, config in cls.RULES.items():
matches = words & set(config['keywords'])
if len(matches) >= 2:
result['problem_types'].append(ptype)
result['knowledge_points'].extend(config['knowledge'])
result['literacy_points'].extend(config['literacy'])
return result
# 双数据源加载
self.knowledge_points = self._get_knowledge_points()
self.literacy_points = self._get_literacy_points()
print(f"已加载知识点:{len(self.knowledge_points)}个,素养点:{len(self.literacy_points)}")
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
# ================== 大模型模块 ==================
class LLMClient:
"""大模型服务客户端(可选)"""
def __init__(self):
self.enabled = Config.LLM_ENABLED
self.base_url = MODEL_API_URL
self.headers = {
"Authorization": f"Bearer {MODEL_API_KEY}",
"Content-Type": "application/json"
}
def analyze_problem(self, content: str) -> dict:
"""大模型分析(可选增强)"""
if not self.enabled:
return {}
def _get_knowledge_points(self) -> dict:
try:
return {row['n.id']: row['n.name']
for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
payload = {
"model": MODEL_NAME,
"messages": [{
"role": "user",
"content": f"分析数学题目:{content}"
}],
"temperature": 0.3,
"max_tokens": 300
}
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json=payload,
timeout=Config.LLM_TIMEOUT
)
response.raise_for_status()
return self._parse_response(response.json())
except Exception as e:
print(f"知识点加载失败:{str(e)}")
print(f"⚠️ 大模型分析失败: {str(e)}")
return {}
def _get_literacy_points(self) -> dict:
def _parse_response(self, data: dict) -> dict:
"""解析大模型响应"""
try:
return {row['n.value']: row['n.title']
for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
except Exception as e:
print(f"素养点加载失败:{str(e)}")
content = data['choices'][0]['message']['content']
return json.loads(content)
except:
return {}
def _make_prompt(self) -> str:
kp_samples = "\n".join([f"{k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
lp_samples = "\n".join([f"{k}: {v}" for k, v in list(self.literacy_points.items())[:3]])
return f"""请分析题目考查的知识点和核心素养:
可用知识点ID:名称
{kp_samples}
...{len(self.knowledge_points)}个知识点
可用素养点ID:名称
{lp_samples}
...{len(self.literacy_points)}个素养点
生成要求
1. 必须使用上述ID
2. 按以下格式生成Cypher代码
MERGE (q:Question {{id: "{self.question_id}"}})
SET q.content = "题目内容"
WITH q
MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
WITH q
MATCH (lp:LiteracyNode {{value: "素养点ID"}})
MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""
def _clean_cypher(self, code: str) -> str:
valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]
cleaned = []
lines = [line.strip() for line in code.split('\n') if line.strip()]
for line in lines:
# 处理知识点匹配
if 'MATCH (kp:KnowledgePoint' in line:
if match := re.search(r'id: ["\'](.*?)["\']', line):
kp_id = match.group(1).upper()
if kp_id in valid_kp_ids:
cleaned.append(line.replace(match.group(1), kp_id))
# 处理素养点匹配
elif 'MATCH (lp:LiteracyNode' in line:
if match := re.search(r'value: ["\'](.*?)["\']', line):
lp_id = match.group(1).upper()
if lp_id in valid_lp_ids:
cleaned.append(line.replace(match.group(1), lp_id))
# 保留其他合法语句
elif line.startswith(('MERGE', 'WITH', 'SET')):
cleaned.append(line)
return '\n'.join(cleaned)
def run(self) -> str:
# ================== 知识图谱模块 ==================
class KnowledgeManager:
"""知识图谱管理器"""
def __init__(self):
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
self._clean_data()
self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy()
def _clean_data(self):
"""数据清洗"""
self.graph.run("""
MATCH (n)
WHERE n.name CONTAINS '测试' OR n.id IS NULL
DETACH DELETE n
""")
def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点"""
result = self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")
return {rec['n.id']: rec['n.name'] for rec in result}
def _load_literacy(self) -> Dict[str, str]:
"""加载素养点"""
result = self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")
return {rec['n.value']: rec['n.title'] for rec in result}
def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]):
"""存储分析结果"""
try:
response = self.client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": self._make_prompt()},
{"role": "user", "content": f"题目内容:{self.content}"}
]
# 创建题目节点
self.graph.run(
f"MERGE (q:Question {{id: '{question_id}', content: '{content}'}})"
)
return self._clean_cypher(response.choices[0].message.content)
# 关联知识点
for kp_name in knowledge:
kp_id = next((k for k, v in self.knowledge_map.items() if v == kp_name), None)
if kp_id:
self.graph.run(f"""
MERGE (kp:KnowledgePoint {{id: '{kp_id}'}})
WITH q, kp
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)
""")
# 关联素养点
for lit_name in literacy:
lit_id = next((k for k, v in self.literacy_map.items() if v == lit_name), None)
if lit_id:
self.graph.run(f"""
MERGE (lp:LiteracyNode {{value: '{lit_id}'}})
WITH q, lp
MATCH (q:Question {{id: '{question_id}'}})
MERGE (q)-[:DEVELOPS_LITERACY]->(lp)
""")
except Exception as e:
print(f"分析失败:{str(e)}")
return ""
def query_relations(self):
cypher = f"""
MATCH (q:Question {{id: "{self.question_id}"}})
OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
RETURN
kp.id AS knowledge_id,
kp.name AS knowledge_name,
lp.value AS literacy_id,
lp.title AS literacy_title"""
return self.graph.run(cypher).data()
# 使用示例
print(f"❌ 存储失败: {str(e)}")
# ================== 核心逻辑模块 ==================
class ProblemAnalyzer:
"""题目分析引擎"""
def __init__(self, content: str):
self.original = content
self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager()
self.llm = LLMClient()
def _preprocess(self, text: str) -> str:
"""文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict:
"""执行分析流程"""
# 本地规则分析
local_result = LocalKnowledgeBase.analyze(self.content)
# 大模型增强分析
llm_result = self.llm.analyze_problem(self.original)
# 结果融合
return {
"problem_id": self.question_id,
"problem_types": list(set(
local_result.get('problem_types', []) +
llm_result.get('problem_types', [])
))[:3],
"knowledge_points": list(set(
local_result.get('knowledge_points', []) +
llm_result.get('knowledge_points', [])
))[:2],
"literacy_points": list(set(
local_result.get('literacy_points', []) +
llm_result.get('literacy_points', [])
))[:2]
}
def execute(self):
"""执行完整流程"""
print(f"\n🔍 开始分析题目:{self.original[:50]}...")
analysis = self.analyze()
print("\n📊 分析报告:")
print(f" 题型识别:{analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}")
# 存储到知识图谱
self.kg.store_analysis(
question_id=analysis['problem_id'],
content=self.content,
knowledge=analysis.get('knowledge_points', []),
literacy=analysis.get('literacy_points', [])
)
print("✅ 数据存储完成")
# ================== 测试用例 ==================
if __name__ == '__main__':
question_blocks = split_questions('ShiTi.md')
# 验证分割结果
for i, block in enumerate(question_blocks, 1):
print(f"{i}题块:")
print("-" * 50)
kg = KnowledgeGraph(block)
if cypher := kg.run():
print("生成的Cypher:\n", cypher)
kg.graph.run(cypher)
print("关联结果:")
for record in kg.query_relations():
print(f"知识点:{record['knowledge_name']} ({record['knowledge_id']})")
print(f"素养点:{record['literacy_title']} ({record['literacy_id']})")
test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇",
"一项工程甲队单独做需要10天乙队需要15天两队合作需要多少天",
"一个长方形长8cm宽5cm求面积和周长",
"含盐20%的盐水500克要配成15%的盐水,需加水多少克?"
]
for question in test_cases:
print("\n" + "=" * 80)
analyzer = ProblemAnalyzer(question)
analyzer.execute()
Loading…
Cancel
Save