|
|
|
@ -36,8 +36,8 @@ class KnowledgeGraph:
|
|
|
|
|
1. 只使用以下存在的知识点(格式:ID:名称):
|
|
|
|
|
{", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}...
|
|
|
|
|
共{len(self.knowledge_points)}个可用知识点
|
|
|
|
|
|
|
|
|
|
2. 按此格式生成Cypher:
|
|
|
|
|
2. 题目可能包含多个知识点,让仔细检查。
|
|
|
|
|
3. 按此格式生成Cypher:
|
|
|
|
|
MERGE (q:Question {{id: "{self.question_id}"}})
|
|
|
|
|
SET q.content = "题目内容"
|
|
|
|
|
WITH q
|
|
|
|
@ -45,66 +45,149 @@ MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
|
|
|
|
|
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""
|
|
|
|
|
|
|
|
|
|
def _clean_cypher(self, code: str) -> str:
|
|
|
|
|
"""完整清洗逻辑"""
|
|
|
|
|
"""完整Cypher清洗逻辑(增强版)"""
|
|
|
|
|
safe = []
|
|
|
|
|
cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL)
|
|
|
|
|
if not cypher_block:
|
|
|
|
|
return ""
|
|
|
|
|
content_keywords = {
|
|
|
|
|
'行程问题': ['相遇', '相向而行', '追及', '速度', '路程'],
|
|
|
|
|
'几何问题': ['面积', '体积', '周长', '三角形', '长方体'],
|
|
|
|
|
'分数运算': ['分数', '百分比', '%', '分之']
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# 提取代码块
|
|
|
|
|
cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL)
|
|
|
|
|
if not cypher_block:
|
|
|
|
|
print("未检测到Cypher代码块")
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# 预处理配置
|
|
|
|
|
valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
|
|
|
|
|
detected_types = []
|
|
|
|
|
raw_lines = cypher_block[0].split('\n')
|
|
|
|
|
has_question = False
|
|
|
|
|
|
|
|
|
|
# === 第一步:基础清洗 ===
|
|
|
|
|
for line in raw_lines:
|
|
|
|
|
# 清理注释和空白
|
|
|
|
|
clean_line = line.split('//')[0].strip()
|
|
|
|
|
if not clean_line:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 阻止CREATE操作
|
|
|
|
|
if 'CREATE' in clean_line.upper():
|
|
|
|
|
print(f"阻止CREATE操作: {clean_line}")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 强制Question节点在最前面
|
|
|
|
|
if 'MERGE (q:Question' in clean_line:
|
|
|
|
|
has_question = True
|
|
|
|
|
safe.insert(0, clean_line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
safe.append(clean_line)
|
|
|
|
|
|
|
|
|
|
# === 第二步:检测题目类型 ===
|
|
|
|
|
for pattern, keys in content_keywords.items():
|
|
|
|
|
if any(k in self.content for k in keys):
|
|
|
|
|
detected_types.append(pattern)
|
|
|
|
|
print(f"检测到题目类型: {pattern}")
|
|
|
|
|
|
|
|
|
|
# 预处理:获取所有知识点的规范大写形式
|
|
|
|
|
valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
|
|
|
|
|
|
|
|
|
|
has_question = False
|
|
|
|
|
for line in cypher_block[0].split('\n'):
|
|
|
|
|
# 清理注释和空白
|
|
|
|
|
line = line.split('//')[0].strip()
|
|
|
|
|
if not line:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 阻止CREATE操作
|
|
|
|
|
if 'CREATE' in line.upper():
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 强制Question节点在最前面
|
|
|
|
|
if 'MERGE (q:Question' in line:
|
|
|
|
|
has_question = True
|
|
|
|
|
safe.insert(0, line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 处理知识点匹配
|
|
|
|
|
if 'MATCH (kp:KnowledgePoint' in line:
|
|
|
|
|
# 提取并验证ID
|
|
|
|
|
kp_id_match = re.search(r"id: ['\"](.*?)['\"]", line)
|
|
|
|
|
if kp_id_match:
|
|
|
|
|
original_id = kp_id_match.group(1)
|
|
|
|
|
# === 第三步:处理知识点ID ===
|
|
|
|
|
knowledge_lines = []
|
|
|
|
|
for line in safe.copy():
|
|
|
|
|
if 'MATCH (kp:KnowledgePoint' in line:
|
|
|
|
|
# 安全提取ID
|
|
|
|
|
match = re.search(r"id: ['\"](.*?)['\"]", line)
|
|
|
|
|
if not match:
|
|
|
|
|
print(f"无效的MATCH语句: {line}")
|
|
|
|
|
safe.remove(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
original_id = match.group(1)
|
|
|
|
|
upper_id = original_id.upper()
|
|
|
|
|
|
|
|
|
|
# 验证存在性(不区分大小写)
|
|
|
|
|
# 验证ID存在性
|
|
|
|
|
if upper_id not in valid_ids_upper:
|
|
|
|
|
print(f"忽略无效知识点ID: {original_id}")
|
|
|
|
|
safe.remove(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 替换为数据库实际存储的大写ID
|
|
|
|
|
line = line.replace(original_id, upper_id)
|
|
|
|
|
# 替换为正确的大写ID
|
|
|
|
|
new_line = line.replace(original_id, upper_id)
|
|
|
|
|
safe[safe.index(line)] = new_line
|
|
|
|
|
knowledge_lines.append(new_line)
|
|
|
|
|
|
|
|
|
|
# === 第四步:自动补充知识点 ===
|
|
|
|
|
for dtype in detected_types:
|
|
|
|
|
# 安全获取已关联知识点ID
|
|
|
|
|
extracted_ids = []
|
|
|
|
|
for line in knowledge_lines:
|
|
|
|
|
try:
|
|
|
|
|
match = re.search(r"id: ['\"](.*?)['\"]", line)
|
|
|
|
|
if match:
|
|
|
|
|
kp_id = match.group(1).upper()
|
|
|
|
|
extracted_ids.append(kp_id)
|
|
|
|
|
except AttributeError:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# 获取对应的知识点名称(确保为字符串)
|
|
|
|
|
type_exists = any(
|
|
|
|
|
dtype in str(self.knowledge_points.get(kp_id, ''))
|
|
|
|
|
for kp_id in extracted_ids
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not type_exists:
|
|
|
|
|
# 查找匹配的知识点(添加空值过滤)
|
|
|
|
|
candidates = [
|
|
|
|
|
(k, v) for k, v in self.knowledge_points.items()
|
|
|
|
|
if v and dtype in str(v) # 确保v是字符串
|
|
|
|
|
and k.upper() in valid_ids_upper
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
# 自动补全WITH语句
|
|
|
|
|
if has_question and 'MERGE (q)-[:TESTS_KNOWLEDGE]' in line:
|
|
|
|
|
if not any('WITH q' in l for l in safe):
|
|
|
|
|
safe.append("WITH q")
|
|
|
|
|
# 按名称匹配度排序
|
|
|
|
|
candidates.sort(key=lambda x: (
|
|
|
|
|
dtype in x[1], # 优先完全匹配
|
|
|
|
|
-len(x[1]) # 次优先名称长度短的
|
|
|
|
|
), reverse=True)
|
|
|
|
|
|
|
|
|
|
safe.append(line)
|
|
|
|
|
if candidates:
|
|
|
|
|
target_id, target_name = candidates[0]
|
|
|
|
|
print(f"补充知识点: {target_id} - {target_name}")
|
|
|
|
|
safe.extend([
|
|
|
|
|
"WITH q",
|
|
|
|
|
f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})",
|
|
|
|
|
"MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"
|
|
|
|
|
])
|
|
|
|
|
else:
|
|
|
|
|
print(f"未找到匹配的{dtype}知识点")
|
|
|
|
|
|
|
|
|
|
# 确保Question节点后紧跟WITH
|
|
|
|
|
if has_question:
|
|
|
|
|
# 在MERGE (q:Question)之后插入WITH
|
|
|
|
|
for i, line in enumerate(safe):
|
|
|
|
|
if 'MERGE (q:Question' in line:
|
|
|
|
|
if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'):
|
|
|
|
|
safe.insert(i + 1, "WITH q")
|
|
|
|
|
break
|
|
|
|
|
# === 第五步:语法修正 ===
|
|
|
|
|
# 确保Question节点后紧跟WITH
|
|
|
|
|
if has_question:
|
|
|
|
|
for i, line in enumerate(safe):
|
|
|
|
|
if 'MERGE (q:Question' in line:
|
|
|
|
|
# 检查下一条是否是WITH
|
|
|
|
|
if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'):
|
|
|
|
|
safe.insert(i + 1, "WITH q")
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# 最终过滤空行
|
|
|
|
|
return '\n'.join([line for line in safe if line])
|
|
|
|
|
# 移除重复的WITH语句
|
|
|
|
|
final_safe = []
|
|
|
|
|
prev_was_with = False
|
|
|
|
|
for line in safe:
|
|
|
|
|
if line.startswith('WITH'):
|
|
|
|
|
if not prev_was_with:
|
|
|
|
|
final_safe.append(line)
|
|
|
|
|
prev_was_with = True
|
|
|
|
|
else:
|
|
|
|
|
final_safe.append(line)
|
|
|
|
|
prev_was_with = False
|
|
|
|
|
|
|
|
|
|
return '\n'.join(final_safe)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"清洗Cypher时发生错误: {str(e)}")
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
def run(self) -> str:
|
|
|
|
|
"""执行知识点关联流程"""
|
|
|
|
@ -158,6 +241,7 @@ MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
test_case = """【时间问题】甲乙两车从相距240公里的两地同时出发相向而行,甲车时速60公里,乙车时速40公里,几小时后相遇?"""
|
|
|
|
|
kg = KnowledgeGraph(test_case)
|
|
|
|
|
#print("原始知识点库:", kg.knowledge_points) # 查看加载的知识点
|
|
|
|
|
cypher = kg.run()
|
|
|
|
|
if cypher:
|
|
|
|
|
# 插入数据
|
|
|
|
@ -167,14 +251,17 @@ if __name__ == '__main__':
|
|
|
|
|
else:
|
|
|
|
|
print("未生成有效Cypher")
|
|
|
|
|
|
|
|
|
|
# # 临时诊断
|
|
|
|
|
# print("当前知识库中是否存在该ID:",
|
|
|
|
|
# 'f0333b305f7246b5a06d03d4e3ff55a9' in kg.knowledge_points)
|
|
|
|
|
#
|
|
|
|
|
# # 直接查询数据库
|
|
|
|
|
# test_cypher = '''
|
|
|
|
|
# MATCH (kp:KnowledgePoint)
|
|
|
|
|
# WHERE kp.id = 'f0333b305f7246b5a06d03d4e3ff55a9'
|
|
|
|
|
# RETURN kp.id, kp.name
|
|
|
|
|
# '''
|
|
|
|
|
# print("直接查询结果:", kg.graph.run(test_cypher).data())
|
|
|
|
|
'''
|
|
|
|
|
# 基本可视化查询
|
|
|
|
|
MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
|
|
|
|
|
RETURN path
|
|
|
|
|
|
|
|
|
|
# 带样式的可视化
|
|
|
|
|
MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
|
|
|
|
|
RETURN q, kp
|
|
|
|
|
// 在浏览器中点击左侧样式图标,设置:
|
|
|
|
|
// - Question节点颜色:橙色
|
|
|
|
|
// - KnowledgePoint节点颜色:蓝色
|
|
|
|
|
// - 关系线宽:3px
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
|