main
黄海 5 months ago
parent 0e2585110e
commit e431f7c354

@ -1,194 +1,162 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
数学题目分析系统 v5.0离线可用 数学题目分析系统 v6.1严格匹配
功能特性 功能特性
1. 本地规则引擎为主 + 大模型增强可选 1. 纯大模型分析
2. 自动Neo4j数据清洗 2. 流式响应处理
3. 完善的错误处理 3. 严格匹配已有节点
4. 详细的运行日志 4. 不创建新知识点/素养点
""" """
from Config import *
import hashlib
import json
import re import re
import json
import hashlib
from typing import Dict, List from typing import Dict, List
import jieba
import requests import requests
from py2neo import Graph from py2neo import Graph
from Config import * from Config import *
# 初始化分词器
jieba.initialize()
# ================== 配置区 ================== # ================== 配置区 ==================
class Config: class Config:
# 大模型配置(默认关闭) # Neo4j配置
LLM_ENABLED = True # 设置为True启用大模型 NEO4J_URI = NEO4J_URI
LLM_TIMEOUT = 8 NEO4J_AUTH = NEO4J_AUTH
LLM_MAX_RETRIES = 2
# 系统参数 # 大模型配置(示例为阿里云配置)
MAX_CONTENT_LENGTH = 500 MODEL_API_URL = MODEL_API_URL
MODEL_API_KEY = MODEL_API_KEY
MODEL_NAME = MODEL_NAME
# 超时配置
STREAM_TIMEOUT = 30 # 流式响应总超时
CHUNK_TIMEOUT = 5 # 单次数据块等待超时
# ================== 本地知识库 ================== # 系统参数
class LocalKnowledgeBase: MAX_CONTENT_LENGTH = 500
"""本地规则分析引擎"""
RULES = {
'arithmetic': {
'keywords': ['', '', '', '还剩', '单价', '总价'],
'knowledge': ['四则运算应用(购物问题)'],
'literacy': ['数感培养']
},
'travel': {
'keywords': ['相遇', '速度', '距离', '时间', '出发'],
'knowledge': ['相遇问题解决方案'],
'literacy': ['空间观念']
},
'work': {
'keywords': ['合作', '效率', '工期', '完成', '单独'],
'knowledge': ['工程合作效率计算'],
'literacy': ['模型思想']
},
'geometry': {
'keywords': ['面积', '周长', '体积', '平方', '立方'],
'knowledge': ['几何图形面积计算'],
'literacy': ['空间观念']
},
'ratio': {
'keywords': ['百分比', '浓度', '稀释', '配比'],
'knowledge': ['浓度问题配比计算'],
'literacy': ['数据分析']
}
}
@classmethod
def analyze(cls, content: str) -> dict:
"""本地规则分析"""
result = {
'problem_types': [],
'knowledge_points': [],
'literacy_points': []
}
words = set(jieba.cut(content))
for ptype, config in cls.RULES.items():
if len(words & set(config['keywords'])) >= 2:
result['problem_types'].append(ptype)
result['knowledge_points'].extend(config['knowledge'])
result['literacy_points'].extend(config['literacy'])
return result
# ================== 大模型客户端 ================== # ================== 流式大模型客户端 ==================
class LLMClient: class StreamLLMClient:
"""增强版大模型客户端""" """支持流式响应的大模型客户端"""
def __init__(self): def __init__(self):
self.enabled = Config.LLM_ENABLED self.base_url = Config.MODEL_API_URL
self.base_url = MODEL_API_URL self.headers = {
self.headers = {"Authorization": f"Bearer {MODEL_API_KEY}"} "Authorization": f"Bearer {Config.MODEL_API_KEY}",
"Content-Type": "application/json"
}
def analyze_problem(self, content: str) -> dict: def analyze_problem(self, content: str) -> dict:
"""带重试机制的分析请求""" """流式响应分析"""
if not self.enabled:
return {}
for attempt in range(Config.LLM_MAX_RETRIES):
try:
response = requests.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": MODEL_NAME,
"messages": [{
"role": "user",
"content": f"分析数学题目:{content}"
}],
"temperature": 0.3
},
timeout=Config.LLM_TIMEOUT
)
response.raise_for_status()
return self._parse_response(response.json())
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误(尝试 {attempt + 1}/{Config.LLM_MAX_RETRIES}: {str(e)}")
return {}
def _parse_response(self, response: dict) -> dict:
"""解析大模型响应"""
try: try:
content = response['choices'][0]['message']['content'] response = requests.post(
return json.loads(re.search(r'\{.*\}', content, re.DOTALL).group()) f"{self.base_url}/chat/completions",
headers=self.headers,
json={
"model": Config.MODEL_NAME,
"messages": [{
"role": "user",
"content": f"""请严格按JSON格式分析数学题目
{{
"problem_types": ["题型列表"],
"knowledge_points": ["知识点名称(必须与数据库完全一致)"],
"literacy_points": ["素养点名称(必须与数据库完全一致)"]
}}
题目{content}"""
}],
"temperature": 0.2,
"stream": True
},
timeout=Config.STREAM_TIMEOUT,
stream=True
)
response.raise_for_status()
return self._process_stream(response)
except requests.exceptions.RequestException as e:
print(f"🌐 网络错误: {str(e)}")
return {}
except Exception as e: except Exception as e:
print(f"🔴 解析失败: {str(e)}") print(f"🔴 解析失败: {str(e)}")
return {} return {}
def _process_stream(self, response) -> dict:
"""处理流式响应"""
full_content = ""
for chunk in response.iter_lines():
if chunk:
decoded_chunk = chunk.decode('utf-8')
if decoded_chunk.startswith("data:"):
try:
chunk_data = json.loads(decoded_chunk[5:])
content = chunk_data['choices'][0]['delta'].get('content', '')
full_content += content
except:
continue
try:
json_str = re.search(r'\{.*\}', full_content, re.DOTALL).group()
return json.loads(json_str)
except:
print("🔴 无法解析大模型输出")
return {}
# ================== 知识图谱管理 ================== # ================== 知识图谱管理 ==================
class KnowledgeManager: class KnowledgeManager:
"""增强版知识图谱管理器""" """严格匹配型知识图谱管理器"""
def __init__(self): def __init__(self):
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) self.graph = Graph(Config.NEO4J_URI, auth=Config.NEO4J_AUTH)
self._clean_data()
self.knowledge_map = self._load_knowledge() self.knowledge_map = self._load_knowledge()
self.literacy_map = self._load_literacy() self.literacy_map = self._load_literacy()
def _clean_data(self):
"""自动数据清洗"""
self.graph.run("""
MATCH (n)
WHERE n.name CONTAINS '测试' OR n.id IS NULL
DETACH DELETE n
""")
def _load_knowledge(self) -> Dict[str, str]: def _load_knowledge(self) -> Dict[str, str]:
"""加载知识点""" """加载知识点映射id -> name"""
return {rec['n.id']: rec['n.name'] return {rec['n.id']: rec['n.name']
for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} for rec in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
def _load_literacy(self) -> Dict[str, str]: def _load_literacy(self) -> Dict[str, str]:
"""加载素养点""" """加载素养点映射value -> title"""
return {rec['n.value']: rec['n.title'] return {rec['n.value']: rec['n.title']
for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")} for rec in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
def store_analysis(self, question_id: str, content: str, def store_analysis(self, question_id: str, content: str,
knowledge: List[str], literacy: List[str]): knowledge: List[str], literacy: List[str]):
"""事务化存储方法""" """使用参数化查询解决转义问题"""
tx = self.graph.begin() tx = self.graph.begin()
try: try:
# 转义特殊字符 # 使用参数化查询避免转义问题
safe_content = content.replace("'", "\\'") tx.run(
"MERGE (q:Question {id: $question_id}) "
# 创建/更新题目节点 "SET q.content = $content",
tx.run(f""" {
MERGE (q:Question {{id: '{question_id}'}}) "question_id": question_id,
SET q.content = '{safe_content}' "content": content
""") }
)
# 关联知识点
# 关联知识点(参数化版本)
for kp_name in knowledge: for kp_name in knowledge:
if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None): if kp_id := next((k for k, v in self.knowledge_map.items() if v == kp_name), None):
tx.run(f""" tx.run(
MERGE (kp:KnowledgePoint {{id: '{kp_id}'}}) "MATCH (kp:KnowledgePoint {id: $kp_id}) "
WITH kp "MATCH (q:Question {id: $qid}) "
MATCH (q:Question {{id: '{question_id}'}}) "MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp)",
MERGE (q)-[:REQUIRES_KNOWLEDGE]->(kp) {"kp_id": kp_id, "qid": question_id}
""") )
# 关联素养点 # 关联素养点(参数化版本)
for lit_name in literacy: for lit_name in literacy:
if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None): if lit_id := next((k for k, v in self.literacy_map.items() if v == lit_name), None):
tx.run(f""" tx.run(
MERGE (lp:LiteracyNode {{value: '{lit_id}'}}) "MATCH (lp:LiteracyNode {value: $lit_id}) "
WITH lp "MATCH (q:Question {id: $qid}) "
MATCH (q:Question {{id: '{question_id}'}}) "MERGE (q)-[:DEVELOPS_LITERACY]->(lp)",
MERGE (q)-[:DEVELOPS_LITERACY]->(lp) {"lit_id": lit_id, "qid": question_id}
""") )
self.graph.commit(tx) self.graph.commit(tx)
print("✅ 数据存储成功") print("✅ 数据存储成功")
@ -199,42 +167,27 @@ class KnowledgeManager:
# ================== 核心分析引擎 ================== # ================== 核心分析引擎 ==================
class ProblemAnalyzer: class ProblemAnalyzer:
"""题目分析处理器""" """纯大模型分析引擎"""
def __init__(self, content: str): def __init__(self, content: str):
self.original = content self.original = content
self.content = self._preprocess(content) self.content = self._preprocess(content)
self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12] self.question_id = hashlib.sha256(content.encode()).hexdigest()[:12]
self.kg = KnowledgeManager() self.kg = KnowledgeManager()
self.llm = LLMClient() self.llm = StreamLLMClient()
def _preprocess(self, text: str) -> str: def _preprocess(self, text: str) -> str:
"""文本预处理""" """文本预处理"""
return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH] return re.sub(r'[^\w\u4e00-\u9fa5]', '', text)[:Config.MAX_CONTENT_LENGTH]
def analyze(self) -> dict: def analyze(self) -> dict:
"""执行分析流程""" """纯大模型分析"""
# 本地规则分析 result = self.llm.analyze_problem(self.original)
local_result = LocalKnowledgeBase.analyze(self.content)
# 大模型分析
llm_result = self.llm.analyze_problem(self.original)
# 结果融合
return { return {
"problem_id": self.question_id, "problem_id": self.question_id,
"problem_types": list(set( "problem_types": result.get('problem_types', [])[:3],
local_result.get('problem_types', []) + "knowledge_points": result.get('knowledge_points', [])[:2],
llm_result.get('problem_types', []) "literacy_points": result.get('literacy_points', [])[:2]
))[:3],
"knowledge_points": list(set(
local_result.get('knowledge_points', []) +
llm_result.get('knowledge_points', [])
))[:2],
"literacy_points": list(set(
local_result.get('literacy_points', []) +
llm_result.get('literacy_points', [])
))[:2]
} }
def execute(self): def execute(self):
@ -243,7 +196,7 @@ class ProblemAnalyzer:
analysis = self.analyze() analysis = self.analyze()
print("\n📊 分析报告:") print("\n📊 大模型分析报告:")
print(f" 题型识别:{analysis.get('problem_types', [])}") print(f" 题型识别:{analysis.get('problem_types', [])}")
print(f" 推荐知识点:{analysis.get('knowledge_points', [])}") print(f" 推荐知识点:{analysis.get('knowledge_points', [])}")
print(f" 关联素养点:{analysis.get('literacy_points', [])}") print(f" 关联素养点:{analysis.get('literacy_points', [])}")
@ -261,10 +214,7 @@ class ProblemAnalyzer:
if __name__ == '__main__': if __name__ == '__main__':
test_cases = [ test_cases = [
"小明用50元买了3本笔记本每本8元还剩多少钱", "小明用50元买了3本笔记本每本8元还剩多少钱",
"甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇", "甲乙两车相距300公里甲车速度60km/h乙车40km/h几小时后相遇"
"一项工程甲队单独做需要10天乙队需要15天两队合作需要多少天",
"一个长方形长8cm宽5cm求面积和周长",
"含盐20%的盐水500克要配成15%的盐水,需加水多少克?"
] ]
for question in test_cases: for question in test_cases:

Loading…
Cancel
Save