diff --git a/AI/Neo4j/Backup/K3_Start.py b/AI/Neo4j/Backup/K3_Start.py index c9f254ea..7c771f88 100644 --- a/AI/Neo4j/Backup/K3_Start.py +++ b/AI/Neo4j/Backup/K3_Start.py @@ -36,7 +36,7 @@ if __name__ == '__main__': executor.execute_cypher_text(init_script) # 分段读入题目 - question_blocks = split_questions('../ShiTi.md') + question_blocks = split_questions('ShiTi.md') # 验证分割结果 for i, block in enumerate(question_blocks, 1): diff --git a/AI/Neo4j/ShiTi.md b/AI/Neo4j/Backup/ShiTi.md similarity index 100% rename from AI/Neo4j/ShiTi.md rename to AI/Neo4j/Backup/ShiTi.md diff --git a/AI/Neo4j/InputShiTi.py b/AI/Neo4j/InputShiTi.py index aa46a770..76e2a769 100644 --- a/AI/Neo4j/InputShiTi.py +++ b/AI/Neo4j/InputShiTi.py @@ -36,8 +36,8 @@ class KnowledgeGraph: 1. 只使用以下存在的知识点(格式:ID:名称): {", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}... 共{len(self.knowledge_points)}个可用知识点 - -2. 按此格式生成Cypher: +2. 题目可能包含多个知识点,让仔细检查。 +3. 按此格式生成Cypher: MERGE (q:Question {{id: "{self.question_id}"}}) SET q.content = "题目内容" WITH q @@ -45,66 +45,149 @@ MATCH (kp:KnowledgePoint {{id: "知识点ID"}}) MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)""" def _clean_cypher(self, code: str) -> str: - """完整清洗逻辑""" + """完整Cypher清洗逻辑(增强版)""" safe = [] - cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL) - if not cypher_block: - return "" + content_keywords = { + '行程问题': ['相遇', '相向而行', '追及', '速度', '路程'], + '几何问题': ['面积', '体积', '周长', '三角形', '长方体'], + '分数运算': ['分数', '百分比', '%', '分之'] + } + + try: + # 提取代码块 + cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL) + if not cypher_block: + print("未检测到Cypher代码块") + return "" + + # 预处理配置 + valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()] + detected_types = [] + raw_lines = cypher_block[0].split('\n') + has_question = False + + # === 第一步:基础清洗 === + for line in raw_lines: + # 清理注释和空白 + clean_line = line.split('//')[0].strip() + if not clean_line: + continue + + # 阻止CREATE操作 + if 'CREATE' in clean_line.upper(): + print(f"阻止CREATE操作: {clean_line}") + continue + + # 强制Question节点在最前面 + if 'MERGE (q:Question' in clean_line: + has_question = True + safe.insert(0, clean_line) + continue + + safe.append(clean_line) + + # === 第二步:检测题目类型 === + for pattern, keys in content_keywords.items(): + if any(k in self.content for k in keys): + detected_types.append(pattern) + print(f"检测到题目类型: {pattern}") - # 预处理:获取所有知识点的规范大写形式 - valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()] - - has_question = False - for line in cypher_block[0].split('\n'): - # 清理注释和空白 - line = line.split('//')[0].strip() - if not line: - continue - - # 阻止CREATE操作 - if 'CREATE' in line.upper(): - continue - - # 强制Question节点在最前面 - if 'MERGE (q:Question' in line: - has_question = True - safe.insert(0, line) - continue - - # 处理知识点匹配 - if 'MATCH (kp:KnowledgePoint' in line: - # 提取并验证ID - kp_id_match = re.search(r"id: ['\"](.*?)['\"]", line) - if kp_id_match: - original_id = kp_id_match.group(1) + # === 第三步:处理知识点ID === + knowledge_lines = [] + for line in safe.copy(): + if 'MATCH (kp:KnowledgePoint' in line: + # 安全提取ID + match = re.search(r"id: ['\"](.*?)['\"]", line) + if not match: + print(f"无效的MATCH语句: {line}") + safe.remove(line) + continue + + original_id = match.group(1) upper_id = original_id.upper() - # 验证存在性(不区分大小写) + # 验证ID存在性 if upper_id not in valid_ids_upper: print(f"忽略无效知识点ID: {original_id}") + safe.remove(line) continue - # 替换为数据库实际存储的大写ID - line = line.replace(original_id, upper_id) + # 替换为正确的大写ID + new_line = line.replace(original_id, upper_id) + safe[safe.index(line)] = new_line + knowledge_lines.append(new_line) + + # === 第四步:自动补充知识点 === + for dtype in detected_types: + # 安全获取已关联知识点ID + extracted_ids = [] + for line in knowledge_lines: + try: + match = re.search(r"id: ['\"](.*?)['\"]", line) + if match: + kp_id = match.group(1).upper() + extracted_ids.append(kp_id) + except AttributeError: + continue + + # 获取对应的知识点名称(确保为字符串) + type_exists = any( + dtype in str(self.knowledge_points.get(kp_id, '')) + for kp_id in extracted_ids + ) + + if not type_exists: + # 查找匹配的知识点(添加空值过滤) + candidates = [ + (k, v) for k, v in self.knowledge_points.items() + if v and dtype in str(v) # 确保v是字符串 + and k.upper() in valid_ids_upper + ] - # 自动补全WITH语句 - if has_question and 'MERGE (q)-[:TESTS_KNOWLEDGE]' in line: - if not any('WITH q' in l for l in safe): - safe.append("WITH q") + # 按名称匹配度排序 + candidates.sort(key=lambda x: ( + dtype in x[1], # 优先完全匹配 + -len(x[1]) # 次优先名称长度短的 + ), reverse=True) - safe.append(line) + if candidates: + target_id, target_name = candidates[0] + print(f"补充知识点: {target_id} - {target_name}") + safe.extend([ + "WITH q", + f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})", + "MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)" + ]) + else: + print(f"未找到匹配的{dtype}知识点") - # 确保Question节点后紧跟WITH - if has_question: - # 在MERGE (q:Question)之后插入WITH - for i, line in enumerate(safe): - if 'MERGE (q:Question' in line: - if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'): - safe.insert(i + 1, "WITH q") - break + # === 第五步:语法修正 === + # 确保Question节点后紧跟WITH + if has_question: + for i, line in enumerate(safe): + if 'MERGE (q:Question' in line: + # 检查下一条是否是WITH + if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'): + safe.insert(i + 1, "WITH q") + break - # 最终过滤空行 - return '\n'.join([line for line in safe if line]) + # 移除重复的WITH语句 + final_safe = [] + prev_was_with = False + for line in safe: + if line.startswith('WITH'): + if not prev_was_with: + final_safe.append(line) + prev_was_with = True + else: + final_safe.append(line) + prev_was_with = False + + return '\n'.join(final_safe) + + except Exception as e: + print(f"清洗Cypher时发生错误: {str(e)}") + return "" def run(self) -> str: """执行知识点关联流程""" @@ -158,6 +241,7 @@ MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)""" if __name__ == '__main__': test_case = """【时间问题】甲乙两车从相距240公里的两地同时出发相向而行,甲车时速60公里,乙车时速40公里,几小时后相遇?""" kg = KnowledgeGraph(test_case) + #print("原始知识点库:", kg.knowledge_points) # 查看加载的知识点 cypher = kg.run() if cypher: # 插入数据 @@ -167,14 +251,17 @@ if __name__ == '__main__': else: print("未生成有效Cypher") - # # 临时诊断 - # print("当前知识库中是否存在该ID:", - # 'f0333b305f7246b5a06d03d4e3ff55a9' in kg.knowledge_points) - # - # # 直接查询数据库 - # test_cypher = ''' - # MATCH (kp:KnowledgePoint) - # WHERE kp.id = 'f0333b305f7246b5a06d03d4e3ff55a9' - # RETURN kp.id, kp.name - # ''' - # print("直接查询结果:", kg.graph.run(test_cypher).data()) \ No newline at end of file + ''' + # 基本可视化查询 + MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) + RETURN path + + # 带样式的可视化 + MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) + RETURN q, kp + // 在浏览器中点击左侧样式图标,设置: + // - Question节点颜色:橙色 + // - KnowledgePoint节点颜色:蓝色 + // - 关系线宽:3px + ''' +