You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

268 lines
10 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import re
import hashlib
from py2neo import Graph
from openai import OpenAI
from Config import *
class KnowledgeGraph:
def __init__(self, content: str):
self.content = content
self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
self.knowledge_points = self._get_knowledge_points()
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
#self.knowledge_points = self._get_knowledge_points()
print("加载知识点数量:", len(self.knowledge_points)) # 添加调试信息
def _get_knowledge_points(self) -> dict:
"""保持ID原始大小写"""
try:
# 移除lower()转换
return {row['n.id']: row['n.name'] # 直接使用原始ID
for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
except Exception as e:
print(f"获取知识点失败:", str(e))
return {}
def _make_prompt(self) -> str:
"""生成知识点识别专用提示词"""
example_ids = list(self.knowledge_points.keys())[:5]
example_names = [self.knowledge_points[k] for k in example_ids]
return f"""你是一个数学专家,请分析题目考查的知识点,严格:
1. 只使用以下存在的知识点格式ID:名称):
{", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}...
{len(self.knowledge_points)}个可用知识点
2. 题目可能包含多个知识点,让仔细检查。
3. 按此格式生成Cypher
MERGE (q:Question {{id: "{self.question_id}"}})
SET q.content = "题目内容"
WITH q
MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""
def _clean_cypher(self, code: str) -> str:
"""完整Cypher清洗逻辑增强版"""
safe = []
content_keywords = {
'行程问题': ['相遇', '相向而行', '追及', '速度', '路程'],
'几何问题': ['面积', '体积', '周长', '三角形', '长方体'],
'分数运算': ['分数', '百分比', '%', '分之']
}
try:
# 提取代码块
cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL)
if not cypher_block:
print("未检测到Cypher代码块")
return ""
# 预处理配置
valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
detected_types = []
raw_lines = cypher_block[0].split('\n')
has_question = False
# === 第一步:基础清洗 ===
for line in raw_lines:
# 清理注释和空白
clean_line = line.split('//')[0].strip()
if not clean_line:
continue
# 阻止CREATE操作
if 'CREATE' in clean_line.upper():
print(f"阻止CREATE操作: {clean_line}")
continue
# 强制Question节点在最前面
if 'MERGE (q:Question' in clean_line:
has_question = True
safe.insert(0, clean_line)
continue
safe.append(clean_line)
# === 第二步:检测题目类型 ===
for pattern, keys in content_keywords.items():
if any(k in self.content for k in keys):
detected_types.append(pattern)
print(f"检测到题目类型: {pattern}")
# === 第三步处理知识点ID ===
knowledge_lines = []
for line in safe.copy():
if 'MATCH (kp:KnowledgePoint' in line:
# 安全提取ID
match = re.search(r"id: ['\"](.*?)['\"]", line)
if not match:
print(f"无效的MATCH语句: {line}")
safe.remove(line)
continue
original_id = match.group(1)
upper_id = original_id.upper()
# 验证ID存在性
if upper_id not in valid_ids_upper:
print(f"忽略无效知识点ID: {original_id}")
safe.remove(line)
continue
# 替换为正确的大写ID
new_line = line.replace(original_id, upper_id)
safe[safe.index(line)] = new_line
knowledge_lines.append(new_line)
# === 第四步:自动补充知识点 ===
for dtype in detected_types:
# 安全获取已关联知识点ID
extracted_ids = []
for line in knowledge_lines:
try:
match = re.search(r"id: ['\"](.*?)['\"]", line)
if match:
kp_id = match.group(1).upper()
extracted_ids.append(kp_id)
except AttributeError:
continue
# 获取对应的知识点名称(确保为字符串)
type_exists = any(
dtype in str(self.knowledge_points.get(kp_id, ''))
for kp_id in extracted_ids
)
if not type_exists:
# 查找匹配的知识点(添加空值过滤)
candidates = [
(k, v) for k, v in self.knowledge_points.items()
if v and dtype in str(v) # 确保v是字符串
and k.upper() in valid_ids_upper
]
# 按名称匹配度排序
candidates.sort(key=lambda x: (
dtype in x[1], # 优先完全匹配
-len(x[1]) # 次优先名称长度短的
), reverse=True)
if candidates:
target_id, target_name = candidates[0]
print(f"补充知识点: {target_id} - {target_name}")
safe.extend([
"WITH q",
f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})",
"MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"
])
else:
print(f"未找到匹配的{dtype}知识点")
# === 第五步:语法修正 ===
# 确保Question节点后紧跟WITH
if has_question:
for i, line in enumerate(safe):
if 'MERGE (q:Question' in line:
# 检查下一条是否是WITH
if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'):
safe.insert(i + 1, "WITH q")
break
# 移除重复的WITH语句
final_safe = []
prev_was_with = False
for line in safe:
if line.startswith('WITH'):
if not prev_was_with:
final_safe.append(line)
prev_was_with = True
else:
final_safe.append(line)
prev_was_with = False
return '\n'.join(final_safe)
except Exception as e:
print(f"清洗Cypher时发生错误: {str(e)}")
return ""
def run(self) -> str:
"""执行知识点关联流程"""
try:
response = self.client.chat.completions.create(
model=MODEL_NAME,
messages=[
{
"role": "system",
"content": self._make_prompt()
},
{
"role": "user",
"content": f"题目内容:{self.content}\n请分析考查的知识点只返回Cypher代码"
}
]
)
raw_cypher = response.choices[0].message.content
cleaned_cypher = self._clean_cypher(raw_cypher)
if cleaned_cypher:
print("验证通过的Cypher\n", cleaned_cypher)
return cleaned_cypher
return ""
except Exception as e:
print("知识点分析失败:", str(e))
return ""
def query_related_knowledge(self):
"""查询题目关联的知识点"""
cypher = f"""
MATCH (q:Question {{id: "{self.question_id}"}})-[:TESTS_KNOWLEDGE]->(kp)
RETURN kp.id AS knowledge_id, kp.name AS knowledge_name
"""
try:
result = self.graph.run(cypher).data()
if result:
print(f"题目关联的知识点({self.question_id}")
for row in result:
print(f"- {row['knowledge_name']} (ID: {row['knowledge_id']})")
else:
print("该题目尚未关联知识点")
return result
except Exception as e:
print("查询失败:", str(e))
return []
# 测试用例
if __name__ == '__main__':
test_case = """【时间问题】甲乙两车从相距240公里的两地同时出发相向而行甲车时速60公里乙车时速40公里几小时后相遇"""
kg = KnowledgeGraph(test_case)
#print("原始知识点库:", kg.knowledge_points) # 查看加载的知识点
cypher = kg.run()
if cypher:
# 插入数据
kg.graph.run(cypher)
print("执行成功!关联知识点:")
kg.query_related_knowledge() # 新增查询
else:
print("未生成有效Cypher")
'''
# 基本可视化查询
MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
RETURN path
# 带样式的可视化
MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
RETURN q, kp
// 在浏览器中点击左侧样式图标,设置:
// - Question节点颜色橙色
// - KnowledgePoint节点颜色蓝色
// - 关系线宽3px
'''