main
黄海 5 months ago
parent 623fd11a53
commit e0c4779627

@ -5,283 +5,145 @@ from py2neo import Graph
from openai import OpenAI from openai import OpenAI
from Config import * from Config import *
# 切割试题
def split_questions(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 使用正则表达式匹配题目块(包含答案)
pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
questions = re.findall(pattern, content, re.DOTALL)
# 清洗每个题目块的空白字符
cleaned_questions = [q.strip() for q in questions]
return cleaned_questions[:10] # 确保只返回前10题
class KnowledgeGraph: class KnowledgeGraph:
def __init__(self, content: str): def __init__(self, content: str):
self.content = content self.content = content
self.question_id = hashlib.md5(content.encode()).hexdigest()[:8] self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
# 双数据源加载
self.knowledge_points = self._get_knowledge_points() self.knowledge_points = self._get_knowledge_points()
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL) self.literacy_points = self._get_literacy_points()
print(f"已加载知识点:{len(self.knowledge_points)}个,素养点:{len(self.literacy_points)}")
# self.knowledge_points = self._get_knowledge_points() self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
print("加载知识点数量:", len(self.knowledge_points)) # 添加调试信息
def _get_knowledge_points(self) -> dict: def _get_knowledge_points(self) -> dict:
"""保持ID原始大小写"""
try: try:
# 移除lower()转换 return {row['n.id']: row['n.name']
return {row['n.id']: row['n.name'] # 直接使用原始ID
for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
except Exception as e: except Exception as e:
print(f"获取知识点失败:", str(e)) print(f"知识点加载失败:{str(e)}")
return {} return {}
def _make_prompt(self) -> str: def _get_literacy_points(self) -> dict:
"""生成知识点识别专用提示词"""
example_ids = list(self.knowledge_points.keys())[:5]
example_names = [self.knowledge_points[k] for k in example_ids]
return f"""你是一个数学专家,请分析题目考查的知识点,严格:
1. 只使用以下存在的知识点格式ID:名称
{", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}...
{len(self.knowledge_points)}个可用知识点
2. 题目可能包含多个知识点让仔细检查
3. 按此格式生成Cypher
MERGE (q:Question {{id: "{self.question_id}"}})
SET q.content = "题目内容"
WITH q
MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""
def _clean_cypher(self, code: str) -> str:
"""完整Cypher清洗逻辑增强版"""
safe = []
content_keywords = {
'行程问题': ['相遇', '相向而行', '追及', '速度', '路程'],
'几何问题': ['面积', '体积', '周长', '三角形', '长方体'],
'分数运算': ['分数', '百分比', '%', '分之']
}
try: try:
# 提取代码块 return {row['n.value']: row['n.title']
cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL) for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
if not cypher_block: except Exception as e:
print("未检测到Cypher代码块") print(f"素养点加载失败:{str(e)}")
return "" return {}
# 预处理配置
valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
detected_types = []
raw_lines = cypher_block[0].split('\n')
has_question = False
# === 第一步:基础清洗 ===
for line in raw_lines:
# 清理注释和空白
clean_line = line.split('//')[0].strip()
if not clean_line:
continue
# 阻止CREATE操作
if 'CREATE' in clean_line.upper():
print(f"阻止CREATE操作: {clean_line}")
continue
# 强制Question节点在最前面
if 'MERGE (q:Question' in clean_line:
has_question = True
safe.insert(0, clean_line)
continue
safe.append(clean_line)
# === 第二步:检测题目类型 ===
for pattern, keys in content_keywords.items():
if any(k in self.content for k in keys):
detected_types.append(pattern)
print(f"检测到题目类型: {pattern}")
# === 第三步处理知识点ID ===
knowledge_lines = []
for line in safe.copy():
if 'MATCH (kp:KnowledgePoint' in line:
# 安全提取ID
match = re.search(r"id: ['\"](.*?)['\"]", line)
if not match:
print(f"无效的MATCH语句: {line}")
safe.remove(line)
continue
original_id = match.group(1) def _make_prompt(self) -> str:
upper_id = original_id.upper() kp_samples = "\n".join([f"{k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
lp_samples = "\n".join([f"{k}: {v}" for k, v in list(self.literacy_points.items())[:3]])
# 验证ID存在性 return f"""请分析题目考查的知识点和核心素养:
if upper_id not in valid_ids_upper:
print(f"忽略无效知识点ID: {original_id}")
safe.remove(line)
continue
# 替换为正确的大写ID 可用知识点ID:名称
new_line = line.replace(original_id, upper_id) {kp_samples}
safe[safe.index(line)] = new_line ...{len(self.knowledge_points)}个知识点
knowledge_lines.append(new_line)
# === 第四步:自动补充知识点 === 可用素养点ID:名称
for dtype in detected_types: {lp_samples}
# 安全获取已关联知识点ID ...{len(self.literacy_points)}个素养点
extracted_ids = []
for line in knowledge_lines:
try:
match = re.search(r"id: ['\"](.*?)['\"]", line)
if match:
kp_id = match.group(1).upper()
extracted_ids.append(kp_id)
except AttributeError:
continue
# 获取对应的知识点名称(确保为字符串) 生成要求
type_exists = any( 1. 必须使用上述ID
dtype in str(self.knowledge_points.get(kp_id, '')) 2. 按以下格式生成Cypher代码
for kp_id in extracted_ids
)
if not type_exists: MERGE (q:Question {{id: "{self.question_id}"}})
# 查找匹配的知识点(添加空值过滤) SET q.content = "题目内容"
candidates = [ WITH q
(k, v) for k, v in self.knowledge_points.items() MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
if v and dtype in str(v) # 确保v是字符串 MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
and k.upper() in valid_ids_upper WITH q
] MATCH (lp:LiteracyNode {{value: "素养点ID"}})
MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""
# 按名称匹配度排序 def _clean_cypher(self, code: str) -> str:
candidates.sort(key=lambda x: ( valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
dtype in x[1], # 优先完全匹配 valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]
-len(x[1]) # 次优先名称长度短的
), reverse=True)
if candidates: cleaned = []
target_id, target_name = candidates[0] lines = [line.strip() for line in code.split('\n') if line.strip()]
print(f"补充知识点: {target_id} - {target_name}")
safe.extend([
"WITH q",
f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})",
"MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"
])
else:
print(f"未找到匹配的{dtype}知识点")
# === 第五步:语法修正 === for line in lines:
# 确保Question节点后紧跟WITH # 处理知识点匹配
if has_question: if 'MATCH (kp:KnowledgePoint' in line:
for i, line in enumerate(safe): if match := re.search(r'id: ["\'](.*?)["\']', line):
if 'MERGE (q:Question' in line: kp_id = match.group(1).upper()
# 检查下一条是否是WITH if kp_id in valid_kp_ids:
if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'): cleaned.append(line.replace(match.group(1), kp_id))
safe.insert(i + 1, "WITH q")
break
# 移除重复的WITH语句 # 处理素养点匹配
final_safe = [] elif 'MATCH (lp:LiteracyNode' in line:
prev_was_with = False if match := re.search(r'value: ["\'](.*?)["\']', line):
for line in safe: lp_id = match.group(1).upper()
if line.startswith('WITH'): if lp_id in valid_lp_ids:
if not prev_was_with: cleaned.append(line.replace(match.group(1), lp_id))
final_safe.append(line)
prev_was_with = True
else:
final_safe.append(line)
prev_was_with = False
return '\n'.join(final_safe) # 保留其他合法语句
elif line.startswith(('MERGE', 'WITH', 'SET')):
cleaned.append(line)
except Exception as e: return '\n'.join(cleaned)
print(f"清洗Cypher时发生错误: {str(e)}")
return ""
def run(self) -> str: def run(self) -> str:
"""执行知识点关联流程"""
try: try:
response = self.client.chat.completions.create( response = self.client.chat.completions.create(
model=MODEL_NAME, model=MODEL_NAME,
messages=[ messages=[
{ {"role": "system", "content": self._make_prompt()},
"role": "system", {"role": "user", "content": f"题目内容:{self.content}"}
"content": self._make_prompt()
},
{
"role": "user",
"content": f"题目内容:{self.content}\n请分析考查的知识点只返回Cypher代码"
}
] ]
) )
return self._clean_cypher(response.choices[0].message.content)
raw_cypher = response.choices[0].message.content
cleaned_cypher = self._clean_cypher(raw_cypher)
if cleaned_cypher:
print("验证通过的Cypher\n", cleaned_cypher)
return cleaned_cypher
return ""
except Exception as e: except Exception as e:
print("知识点分析失败:", str(e)) print(f"分析失败:{str(e)}")
return "" return ""
def query_related_knowledge(self): def query_relations(self):
"""查询题目关联的知识点"""
cypher = f""" cypher = f"""
MATCH (q:Question {{id: "{self.question_id}"}})-[:TESTS_KNOWLEDGE]->(kp) MATCH (q:Question {{id: "{self.question_id}"}})
RETURN kp.id AS knowledge_id, kp.name AS knowledge_name OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
""" OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
try: RETURN
result = self.graph.run(cypher).data() kp.id AS knowledge_id,
if result: kp.name AS knowledge_name,
print(f"题目关联的知识点({self.question_id}") lp.value AS literacy_id,
for row in result: lp.title AS literacy_title"""
print(f"- {row['knowledge_name']} (ID: {row['knowledge_id']})") return self.graph.run(cypher).data()
else:
print("该题目尚未关联知识点")
return result # 使用示例
except Exception as e:
print("查询失败:", str(e))
return []
# 切割试题
def split_questions(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 使用正则表达式匹配题目块(包含答案)
pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
questions = re.findall(pattern, content, re.DOTALL)
# 清洗每个题目块的空白字符
cleaned_questions = [q.strip() for q in questions]
return cleaned_questions[:10] # 确保只返回前10题
# 测试用例
if __name__ == '__main__': if __name__ == '__main__':
# 分段读入题目 question_blocks = split_questions('ShiTi.md')
question_blocks = split_questions('Backup/ShiTi.md')
# 验证分割结果 # 验证分割结果
for i, block in enumerate(question_blocks, 1): for i, block in enumerate(question_blocks, 1):
print(f"{i}题块:") print(f"{i}题块:")
print("-" * 50) print("-" * 50)
kg = KnowledgeGraph(block) kg = KnowledgeGraph(block)
cypher = kg.run()
if cypher:
# 插入数据
kg.graph.run(cypher)
print("执行成功!关联知识点:")
kg.query_related_knowledge() # 新增查询
else:
print("未生成有效Cypher")
''' if cypher := kg.run():
# 基本可视化查询 print("生成的Cypher:\n", cypher)
MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) kg.graph.run(cypher)
RETURN path print("关联结果:")
for record in kg.query_relations():
# 带样式的可视化 print(f"知识点:{record['knowledge_name']} ({record['knowledge_id']})")
MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) print(f"素养点:{record['literacy_title']} ({record['literacy_id']})")
RETURN q, kp
// 在浏览器中点击左侧样式图标设置
// - Question节点颜色橙色
// - KnowledgePoint节点颜色蓝色
// - 关系线宽3px
'''

@ -0,0 +1,32 @@
### 三年级
1. 【购物计算】小明用50元买了3本笔记本每本8元还剩多少钱
- 答案50 - 3×8 = 26元
2. 【乘法应用】学校食堂每天消耗15袋大米一周5天需要准备多少袋
- 答案15×5 = 75袋
3. 【周长计算】一个正方形花坛边长3米要在四周围栅栏需要多长的栅栏
- 答案3×4 = 12米
### 四年级
4. 【四则运算】图书馆原有图书1250本上午借出368本下午归还195本现有多少本
- 答案1250 - 368 + 195 = 1077本
5. 【面积问题】长方形果园长25米宽比长短7米这个果园的面积是多少
- 答案25×(25-7) = 450平方米
6. 【时间问题】甲乙两车从相距240公里的两地同时出发相向而行甲车时速60公里乙车时速40公里几小时后相遇
- 答案240÷(60+40) = 2.4小时
### 五年级
7. 【分数运算】果汁店第一天卖出3/4吨橙汁第二天卖出1/2吨苹果汁两天共卖出多少吨
- 答案3/4 + 1/2 = 5/4吨
8. 【小数应用】文具店钢笔单价12.5元买4支送1支买20支实际要付多少钱
- 答案:(20÷5)×4×12.5 = 200元
9. 【体积计算】长方体游泳池长25米宽12米深1.8米,这个游泳池最多能装多少立方米水?
- 答案25×12×1.8 = 540立方米
10. 【比例问题】妈妈给小明和妹妹36元零花钱按5:4分配小明能得多少元
- 答案36÷(5+4)×5 = 20元
Loading…
Cancel
Save