# -*- coding: utf-8 -*- import re import hashlib from py2neo import Graph from openai import OpenAI from Config import * class KnowledgeGraph: def __init__(self, content: str): self.content = content self.question_id = hashlib.md5(content.encode()).hexdigest()[:8] self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) self.knowledge_points = self._get_knowledge_points() self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL) # self.knowledge_points = self._get_knowledge_points() print("加载知识点数量:", len(self.knowledge_points)) # 添加调试信息 def _get_knowledge_points(self) -> dict: """保持ID原始大小写""" try: # 移除lower()转换 return {row['n.id']: row['n.name'] # 直接使用原始ID for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} except Exception as e: print(f"获取知识点失败:", str(e)) return {} def _make_prompt(self) -> str: """生成知识点识别专用提示词""" example_ids = list(self.knowledge_points.keys())[:5] example_names = [self.knowledge_points[k] for k in example_ids] return f"""你是一个数学专家,请分析题目考查的知识点,严格: 1. 只使用以下存在的知识点(格式:ID:名称): {", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}... 共{len(self.knowledge_points)}个可用知识点 2. 题目可能包含多个知识点,让仔细检查。 3. 按此格式生成Cypher: MERGE (q:Question {{id: "{self.question_id}"}}) SET q.content = "题目内容" WITH q MATCH (kp:KnowledgePoint {{id: "知识点ID"}}) MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)""" def _clean_cypher(self, code: str) -> str: """完整Cypher清洗逻辑(增强版)""" safe = [] content_keywords = { '行程问题': ['相遇', '相向而行', '追及', '速度', '路程'], '几何问题': ['面积', '体积', '周长', '三角形', '长方体'], '分数运算': ['分数', '百分比', '%', '分之'] } try: # 提取代码块 cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL) if not cypher_block: print("未检测到Cypher代码块") return "" # 预处理配置 valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()] detected_types = [] raw_lines = cypher_block[0].split('\n') has_question = False # === 第一步:基础清洗 === for line in raw_lines: # 清理注释和空白 clean_line = line.split('//')[0].strip() if not clean_line: continue # 阻止CREATE操作 if 'CREATE' in clean_line.upper(): print(f"阻止CREATE操作: {clean_line}") continue # 强制Question节点在最前面 if 'MERGE (q:Question' in clean_line: has_question = True safe.insert(0, clean_line) continue safe.append(clean_line) # === 第二步:检测题目类型 === for pattern, keys in content_keywords.items(): if any(k in self.content for k in keys): detected_types.append(pattern) print(f"检测到题目类型: {pattern}") # === 第三步:处理知识点ID === knowledge_lines = [] for line in safe.copy(): if 'MATCH (kp:KnowledgePoint' in line: # 安全提取ID match = re.search(r"id: ['\"](.*?)['\"]", line) if not match: print(f"无效的MATCH语句: {line}") safe.remove(line) continue original_id = match.group(1) upper_id = original_id.upper() # 验证ID存在性 if upper_id not in valid_ids_upper: print(f"忽略无效知识点ID: {original_id}") safe.remove(line) continue # 替换为正确的大写ID new_line = line.replace(original_id, upper_id) safe[safe.index(line)] = new_line knowledge_lines.append(new_line) # === 第四步:自动补充知识点 === for dtype in detected_types: # 安全获取已关联知识点ID extracted_ids = [] for line in knowledge_lines: try: match = re.search(r"id: ['\"](.*?)['\"]", line) if match: kp_id = match.group(1).upper() extracted_ids.append(kp_id) except AttributeError: continue # 获取对应的知识点名称(确保为字符串) type_exists = any( dtype in str(self.knowledge_points.get(kp_id, '')) for kp_id in extracted_ids ) if not type_exists: # 查找匹配的知识点(添加空值过滤) candidates = [ (k, v) for k, v in self.knowledge_points.items() if v and dtype in str(v) # 确保v是字符串 and k.upper() in valid_ids_upper ] # 按名称匹配度排序 candidates.sort(key=lambda x: ( dtype in x[1], # 优先完全匹配 -len(x[1]) # 次优先名称长度短的 ), reverse=True) if candidates: target_id, target_name = candidates[0] print(f"补充知识点: {target_id} - {target_name}") safe.extend([ "WITH q", f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})", "MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)" ]) else: print(f"未找到匹配的{dtype}知识点") # === 第五步:语法修正 === # 确保Question节点后紧跟WITH if has_question: for i, line in enumerate(safe): if 'MERGE (q:Question' in line: # 检查下一条是否是WITH if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'): safe.insert(i + 1, "WITH q") break # 移除重复的WITH语句 final_safe = [] prev_was_with = False for line in safe: if line.startswith('WITH'): if not prev_was_with: final_safe.append(line) prev_was_with = True else: final_safe.append(line) prev_was_with = False return '\n'.join(final_safe) except Exception as e: print(f"清洗Cypher时发生错误: {str(e)}") return "" def run(self) -> str: """执行知识点关联流程""" try: response = self.client.chat.completions.create( model=MODEL_NAME, messages=[ { "role": "system", "content": self._make_prompt() }, { "role": "user", "content": f"题目内容:{self.content}\n请分析考查的知识点,只返回Cypher代码" } ] ) raw_cypher = response.choices[0].message.content cleaned_cypher = self._clean_cypher(raw_cypher) if cleaned_cypher: print("验证通过的Cypher:\n", cleaned_cypher) return cleaned_cypher return "" except Exception as e: print("知识点分析失败:", str(e)) return "" def query_related_knowledge(self): """查询题目关联的知识点""" cypher = f""" MATCH (q:Question {{id: "{self.question_id}"}})-[:TESTS_KNOWLEDGE]->(kp) RETURN kp.id AS knowledge_id, kp.name AS knowledge_name """ try: result = self.graph.run(cypher).data() if result: print(f"题目关联的知识点({self.question_id}):") for row in result: print(f"- {row['knowledge_name']} (ID: {row['knowledge_id']})") else: print("该题目尚未关联知识点") return result except Exception as e: print("查询失败:", str(e)) return [] # 切割试题 def split_questions(file_path): with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # 使用正则表达式匹配题目块(包含答案) pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))' questions = re.findall(pattern, content, re.DOTALL) # 清洗每个题目块的空白字符 cleaned_questions = [q.strip() for q in questions] return cleaned_questions[:10] # 确保只返回前10题 # 测试用例 if __name__ == '__main__': # 分段读入题目 question_blocks = split_questions('Backup/ShiTi.md') # 验证分割结果 for i, block in enumerate(question_blocks, 1): print(f"第{i}题块:") print("-" * 50) kg = KnowledgeGraph(block) cypher = kg.run() if cypher: # 插入数据 kg.graph.run(cypher) print("执行成功!关联知识点:") kg.query_related_knowledge() # 新增查询 else: print("未生成有效Cypher") ''' # 基本可视化查询 MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) RETURN path # 带样式的可视化 MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) RETURN q, kp // 在浏览器中点击左侧样式图标,设置: // - Question节点颜色:橙色 // - KnowledgePoint节点颜色:蓝色 // - 关系线宽:3px '''