'commit'

5 months ago · e898ee003e
parent b05f7f18d7
commit e898ee003e
3 changed files with 149 additions and 62 deletions
--- a/AI/Neo4j/Backup/K3_Start.py
+++ b/AI/Neo4j/Backup/K3_Start.py
@ -36,7 +36,7 @@ if __name__ == '__main__':
    executor.execute_cypher_text(init_script)

    # 分段读入题目
-    question_blocks = split_questions('../ShiTi.md')
+    question_blocks = split_questions('ShiTi.md')

    # 验证分割结果
    for i, block in enumerate(question_blocks, 1):
--- a/AI/Neo4j/Backup/ShiTi.md
+++ b/AI/Neo4j/Backup/ShiTi.md
--- a/AI/Neo4j/InputShiTi.py
+++ b/AI/Neo4j/InputShiTi.py
@ -36,8 +36,8 @@ class KnowledgeGraph:
 1. 只使用以下存在的知识点（格式：ID:名称）：
 {", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}...
 共{len(self.knowledge_points)}个可用知识点
-
-2. 按此格式生成Cypher：
+2. 题目可能包含多个知识点，让仔细检查。
+3. 按此格式生成Cypher：
 MERGE (q:Question {{id: "{self.question_id}"}})
 SET q.content = "题目内容"
 WITH q
@ -45,66 +45,149 @@ MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
 MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""

    def _clean_cypher(self, code: str) -> str:
-        """完整清洗逻辑"""
+        """完整Cypher清洗逻辑（增强版）"""
        safe = []
-        cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL)
-        if not cypher_block:
-            return ""
+        content_keywords = {
+            '行程问题': ['相遇', '相向而行', '追及', '速度', '路程'],
+            '几何问题': ['面积', '体积', '周长', '三角形', '长方体'],
+            '分数运算': ['分数', '百分比', '%', '分之']
+        }
+
+        try:
+            # 提取代码块
+            cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL)
+            if not cypher_block:
+                print("未检测到Cypher代码块")
+                return ""
+
+            # 预处理配置
+            valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
+            detected_types = []
+            raw_lines = cypher_block[0].split('\n')
+            has_question = False
+
+            # === 第一步：基础清洗 ===
+            for line in raw_lines:
+                # 清理注释和空白
+                clean_line = line.split('//')[0].strip()
+                if not clean_line:
+                    continue
+
+                # 阻止CREATE操作
+                if 'CREATE' in clean_line.upper():
+                    print(f"阻止CREATE操作: {clean_line}")
+                    continue
+
+                # 强制Question节点在最前面
+                if 'MERGE (q:Question' in clean_line:
+                    has_question = True
+                    safe.insert(0, clean_line)
+                    continue
+
+                safe.append(clean_line)
+
+            # === 第二步：检测题目类型 ===
+            for pattern, keys in content_keywords.items():
+                if any(k in self.content for k in keys):
+                    detected_types.append(pattern)
+                    print(f"检测到题目类型: {pattern}")

-        # 预处理：获取所有知识点的规范大写形式
-        valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
-
-        has_question = False
-        for line in cypher_block[0].split('\n'):
-            # 清理注释和空白
-            line = line.split('//')[0].strip()
-            if not line:
-                continue
-
-            # 阻止CREATE操作
-            if 'CREATE' in line.upper():
-                continue
-
-            # 强制Question节点在最前面
-            if 'MERGE (q:Question' in line:
-                has_question = True
-                safe.insert(0, line)
-                continue
-
-            # 处理知识点匹配
-            if 'MATCH (kp:KnowledgePoint' in line:
-                # 提取并验证ID
-                kp_id_match = re.search(r"id: ['\"](.*?)['\"]", line)
-                if kp_id_match:
-                    original_id = kp_id_match.group(1)
+            # === 第三步：处理知识点ID ===
+            knowledge_lines = []
+            for line in safe.copy():
+                if 'MATCH (kp:KnowledgePoint' in line:
+                    # 安全提取ID
+                    match = re.search(r"id: ['\"](.*?)['\"]", line)
+                    if not match:
+                        print(f"无效的MATCH语句: {line}")
+                        safe.remove(line)
+                        continue
+
+                    original_id = match.group(1)
                    upper_id = original_id.upper()

-                    # 验证存在性（不区分大小写）
+                    # 验证ID存在性
                    if upper_id not in valid_ids_upper:
                        print(f"忽略无效知识点ID: {original_id}")
+                        safe.remove(line)
                        continue

-                    # 替换为数据库实际存储的大写ID
-                    line = line.replace(original_id, upper_id)
+                    # 替换为正确的大写ID
+                    new_line = line.replace(original_id, upper_id)
+                    safe[safe.index(line)] = new_line
+                    knowledge_lines.append(new_line)
+
+                # === 第四步：自动补充知识点 ===
+                for dtype in detected_types:
+                    # 安全获取已关联知识点ID
+                    extracted_ids = []
+                    for line in knowledge_lines:
+                        try:
+                            match = re.search(r"id: ['\"](.*?)['\"]", line)
+                            if match:
+                                kp_id = match.group(1).upper()
+                                extracted_ids.append(kp_id)
+                        except AttributeError:
+                            continue
+
+                    # 获取对应的知识点名称（确保为字符串）
+                    type_exists = any(
+                        dtype in str(self.knowledge_points.get(kp_id, ''))
+                        for kp_id in extracted_ids
+                    )
+
+                    if not type_exists:
+                        # 查找匹配的知识点（添加空值过滤）
+                        candidates = [
+                            (k, v) for k, v in self.knowledge_points.items()
+                            if v and dtype in str(v)  # 确保v是字符串
+                               and k.upper() in valid_ids_upper
+                        ]

-            # 自动补全WITH语句
-            if has_question and 'MERGE (q)-[:TESTS_KNOWLEDGE]' in line:
-                if not any('WITH q' in l for l in safe):
-                    safe.append("WITH q")
+                        # 按名称匹配度排序
+                        candidates.sort(key=lambda x: (
+                            dtype in x[1],  # 优先完全匹配
+                            -len(x[1])  # 次优先名称长度短的
+                        ), reverse=True)

-            safe.append(line)
+                        if candidates:
+                            target_id, target_name = candidates[0]
+                            print(f"补充知识点: {target_id} - {target_name}")
+                            safe.extend([
+                                "WITH q",
+                                f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})",
+                                "MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"
+                            ])
+                        else:
+                            print(f"未找到匹配的{dtype}知识点")

-        # 确保Question节点后紧跟WITH
-        if has_question:
-            # 在MERGE (q:Question)之后插入WITH
-            for i, line in enumerate(safe):
-                if 'MERGE (q:Question' in line:
-                    if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'):
-                        safe.insert(i + 1, "WITH q")
-                    break
+            # === 第五步：语法修正 ===
+            # 确保Question节点后紧跟WITH
+            if has_question:
+                for i, line in enumerate(safe):
+                    if 'MERGE (q:Question' in line:
+                        # 检查下一条是否是WITH
+                        if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'):
+                            safe.insert(i + 1, "WITH q")
+                        break

-        # 最终过滤空行
-        return '\n'.join([line for line in safe if line])
+            # 移除重复的WITH语句
+            final_safe = []
+            prev_was_with = False
+            for line in safe:
+                if line.startswith('WITH'):
+                    if not prev_was_with:
+                        final_safe.append(line)
+                    prev_was_with = True
+                else:
+                    final_safe.append(line)
+                    prev_was_with = False
+
+            return '\n'.join(final_safe)
+
+        except Exception as e:
+            print(f"清洗Cypher时发生错误: {str(e)}")
+            return ""

    def run(self) -> str:
        """执行知识点关联流程"""
@ -158,6 +241,7 @@ MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""
 if __name__ == '__main__':
    test_case = """【时间问题】甲乙两车从相距240公里的两地同时出发相向而行，甲车时速60公里，乙车时速40公里，几小时后相遇？"""
    kg = KnowledgeGraph(test_case)
+    #print("原始知识点库：", kg.knowledge_points)  # 查看加载的知识点
    cypher = kg.run()
    if cypher:
        # 插入数据
@ -167,14 +251,17 @@ if __name__ == '__main__':
    else:
        print("未生成有效Cypher")

-    # # 临时诊断
-    # print("当前知识库中是否存在该ID：",
-    #       'f0333b305f7246b5a06d03d4e3ff55a9' in kg.knowledge_points)
-    #
-    # # 直接查询数据库
-    # test_cypher = '''
-    # MATCH (kp:KnowledgePoint)
-    # WHERE kp.id = 'f0333b305f7246b5a06d03d4e3ff55a9'
-    # RETURN kp.id, kp.name
-    # '''
-    # print("直接查询结果：", kg.graph.run(test_cypher).data())
+    '''
+    # 基本可视化查询
+    MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
+    RETURN path
+    
+    # 带样式的可视化
+    MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
+    RETURN q, kp
+    // 在浏览器中点击左侧样式图标，设置：
+    // - Question节点颜色：橙色
+    // - KnowledgePoint节点颜色：蓝色
+    // - 关系线宽：3px
+    '''
+