'commit'

5 months ago · e0c4779627
parent 623fd11a53
commit e0c4779627
2 changed files with 126 additions and 232 deletions
--- a/AI/Neo4j/N3_InputShiTi.py
+++ b/AI/Neo4j/N3_InputShiTi.py
@ -5,283 +5,145 @@ from py2neo import Graph
 from openai import OpenAI
 from Config import *

+# 切割试题
+def split_questions(file_path):
+    with open(file_path, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # 使用正则表达式匹配题目块（包含答案）
+    pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
+    questions = re.findall(pattern, content, re.DOTALL)
+
+    # 清洗每个题目块的空白字符
+    cleaned_questions = [q.strip() for q in questions]
+
+    return cleaned_questions[:10]  # 确保只返回前10题

 class KnowledgeGraph:
    def __init__(self, content: str):
        self.content = content
        self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
+
+        # 双数据源加载
        self.knowledge_points = self._get_knowledge_points()
-        self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
+        self.literacy_points = self._get_literacy_points()
+        print(f"已加载知识点：{len(self.knowledge_points)}个，素养点：{len(self.literacy_points)}个")

-        # self.knowledge_points = self._get_knowledge_points()
-        print("加载知识点数量：", len(self.knowledge_points))  # 添加调试信息
+        self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)

    def _get_knowledge_points(self) -> dict:
-        """保持ID原始大小写"""
        try:
-            # 移除lower()转换
-            return {row['n.id']: row['n.name']  # 直接使用原始ID
+            return {row['n.id']: row['n.name']
                    for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
        except Exception as e:
-            print(f"获取知识点失败：", str(e))
+            print(f"知识点加载失败：{str(e)}")
            return {}

-    def _make_prompt(self) -> str:
-        """生成知识点识别专用提示词"""
-        example_ids = list(self.knowledge_points.keys())[:5]
-        example_names = [self.knowledge_points[k] for k in example_ids]
-
-        return f"""你是一个数学专家，请分析题目考查的知识点，严格：
-1. 只使用以下存在的知识点（格式：ID:名称）：
-{", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}...
-共{len(self.knowledge_points)}个可用知识点
-2. 题目可能包含多个知识点，让仔细检查。
-3. 按此格式生成Cypher：
-MERGE (q:Question {{id: "{self.question_id}"}})
-SET q.content = "题目内容"
-WITH q
-MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
-MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"""
-
-    def _clean_cypher(self, code: str) -> str:
-        """完整Cypher清洗逻辑（增强版）"""
-        safe = []
-        content_keywords = {
-            '行程问题': ['相遇', '相向而行', '追及', '速度', '路程'],
-            '几何问题': ['面积', '体积', '周长', '三角形', '长方体'],
-            '分数运算': ['分数', '百分比', '%', '分之']
-        }
-
+    def _get_literacy_points(self) -> dict:
        try:
-            # 提取代码块
-            cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL)
-            if not cypher_block:
-                print("未检测到Cypher代码块")
-                return ""
-
-            # 预处理配置
-            valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()]
-            detected_types = []
-            raw_lines = cypher_block[0].split('\n')
-            has_question = False
-
-            # === 第一步：基础清洗 ===
-            for line in raw_lines:
-                # 清理注释和空白
-                clean_line = line.split('//')[0].strip()
-                if not clean_line:
-                    continue
-
-                # 阻止CREATE操作
-                if 'CREATE' in clean_line.upper():
-                    print(f"阻止CREATE操作: {clean_line}")
-                    continue
-
-                # 强制Question节点在最前面
-                if 'MERGE (q:Question' in clean_line:
-                    has_question = True
-                    safe.insert(0, clean_line)
-                    continue
-
-                safe.append(clean_line)
-
-            # === 第二步：检测题目类型 ===
-            for pattern, keys in content_keywords.items():
-                if any(k in self.content for k in keys):
-                    detected_types.append(pattern)
-                    print(f"检测到题目类型: {pattern}")
-
-            # === 第三步：处理知识点ID ===
-            knowledge_lines = []
-            for line in safe.copy():
-                if 'MATCH (kp:KnowledgePoint' in line:
-                    # 安全提取ID
-                    match = re.search(r"id: ['\"](.*?)['\"]", line)
-                    if not match:
-                        print(f"无效的MATCH语句: {line}")
-                        safe.remove(line)
-                        continue
+            return {row['n.value']: row['n.title']
+                    for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
+        except Exception as e:
+            print(f"素养点加载失败：{str(e)}")
+            return {}

-                    original_id = match.group(1)
-                    upper_id = original_id.upper()
+    def _make_prompt(self) -> str:
+        kp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
+        lp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.literacy_points.items())[:3]])

-                    # 验证ID存在性
-                    if upper_id not in valid_ids_upper:
-                        print(f"忽略无效知识点ID: {original_id}")
-                        safe.remove(line)
-                        continue
+        return f"""请分析题目考查的知识点和核心素养：

-                    # 替换为正确的大写ID
-                    new_line = line.replace(original_id, upper_id)
-                    safe[safe.index(line)] = new_line
-                    knowledge_lines.append(new_line)
+可用知识点（ID:名称）：
+{kp_samples}
+...共{len(self.knowledge_points)}个知识点

-                # === 第四步：自动补充知识点 ===
-                for dtype in detected_types:
-                    # 安全获取已关联知识点ID
-                    extracted_ids = []
-                    for line in knowledge_lines:
-                        try:
-                            match = re.search(r"id: ['\"](.*?)['\"]", line)
-                            if match:
-                                kp_id = match.group(1).upper()
-                                extracted_ids.append(kp_id)
-                        except AttributeError:
-                            continue
+可用素养点（ID:名称）：
+{lp_samples}
+...共{len(self.literacy_points)}个素养点

-                    # 获取对应的知识点名称（确保为字符串）
-                    type_exists = any(
-                        dtype in str(self.knowledge_points.get(kp_id, ''))
-                        for kp_id in extracted_ids
-                    )
+生成要求：
+1. 必须使用上述ID
+2. 按以下格式生成Cypher代码：

-                    if not type_exists:
-                        # 查找匹配的知识点（添加空值过滤）
-                        candidates = [
-                            (k, v) for k, v in self.knowledge_points.items()
-                            if v and dtype in str(v)  # 确保v是字符串
-                               and k.upper() in valid_ids_upper
-                        ]
+MERGE (q:Question {{id: "{self.question_id}"}})
+SET q.content = "题目内容"
+WITH q
+MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
+MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
+WITH q
+MATCH (lp:LiteracyNode {{value: "素养点ID"}})
+MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""

-                        # 按名称匹配度排序
-                        candidates.sort(key=lambda x: (
-                            dtype in x[1],  # 优先完全匹配
-                            -len(x[1])  # 次优先名称长度短的
-                        ), reverse=True)
+    def _clean_cypher(self, code: str) -> str:
+        valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
+        valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]

-                        if candidates:
-                            target_id, target_name = candidates[0]
-                            print(f"补充知识点: {target_id} - {target_name}")
-                            safe.extend([
-                                "WITH q",
-                                f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})",
-                                "MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)"
-                            ])
-                        else:
-                            print(f"未找到匹配的{dtype}知识点")
+        cleaned = []
+        lines = [line.strip() for line in code.split('\n') if line.strip()]

-            # === 第五步：语法修正 ===
-            # 确保Question节点后紧跟WITH
-            if has_question:
-                for i, line in enumerate(safe):
-                    if 'MERGE (q:Question' in line:
-                        # 检查下一条是否是WITH
-                        if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'):
-                            safe.insert(i + 1, "WITH q")
-                        break
+        for line in lines:
+            # 处理知识点匹配
+            if 'MATCH (kp:KnowledgePoint' in line:
+                if match := re.search(r'id: ["\'](.*?)["\']', line):
+                    kp_id = match.group(1).upper()
+                    if kp_id in valid_kp_ids:
+                        cleaned.append(line.replace(match.group(1), kp_id))

-            # 移除重复的WITH语句
-            final_safe = []
-            prev_was_with = False
-            for line in safe:
-                if line.startswith('WITH'):
-                    if not prev_was_with:
-                        final_safe.append(line)
-                    prev_was_with = True
-                else:
-                    final_safe.append(line)
-                    prev_was_with = False
+            # 处理素养点匹配
+            elif 'MATCH (lp:LiteracyNode' in line:
+                if match := re.search(r'value: ["\'](.*?)["\']', line):
+                    lp_id = match.group(1).upper()
+                    if lp_id in valid_lp_ids:
+                        cleaned.append(line.replace(match.group(1), lp_id))

-            return '\n'.join(final_safe)
+            # 保留其他合法语句
+            elif line.startswith(('MERGE', 'WITH', 'SET')):
+                cleaned.append(line)

-        except Exception as e:
-            print(f"清洗Cypher时发生错误: {str(e)}")
-            return ""
+        return '\n'.join(cleaned)

    def run(self) -> str:
-        """执行知识点关联流程"""
        try:
            response = self.client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
-                    {
-                        "role": "system",
-                        "content": self._make_prompt()
-                    },
-                    {
-                        "role": "user",
-                        "content": f"题目内容：{self.content}\n请分析考查的知识点，只返回Cypher代码"
-                    }
+                    {"role": "system", "content": self._make_prompt()},
+                    {"role": "user", "content": f"题目内容：{self.content}"}
                ]
            )
-
-            raw_cypher = response.choices[0].message.content
-            cleaned_cypher = self._clean_cypher(raw_cypher)
-
-            if cleaned_cypher:
-                print("验证通过的Cypher：\n", cleaned_cypher)
-                return cleaned_cypher
-            return ""
-
+            return self._clean_cypher(response.choices[0].message.content)
        except Exception as e:
-            print("知识点分析失败：", str(e))
+            print(f"分析失败：{str(e)}")
            return ""

-    def query_related_knowledge(self):
-        """查询题目关联的知识点"""
+    def query_relations(self):
        cypher = f"""
-        MATCH (q:Question {{id: "{self.question_id}"}})-[:TESTS_KNOWLEDGE]->(kp)
-        RETURN kp.id AS knowledge_id, kp.name AS knowledge_name
-        """
-        try:
-            result = self.graph.run(cypher).data()
-            if result:
-                print(f"题目关联的知识点（{self.question_id}）：")
-                for row in result:
-                    print(f"- {row['knowledge_name']} (ID: {row['knowledge_id']})")
-            else:
-                print("该题目尚未关联知识点")
-            return result
-        except Exception as e:
-            print("查询失败：", str(e))
-            return []
-
-
-# 切割试题
-def split_questions(file_path):
-    with open(file_path, 'r', encoding='utf-8') as f:
-        content = f.read()
-
-    # 使用正则表达式匹配题目块（包含答案）
-    pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
-    questions = re.findall(pattern, content, re.DOTALL)
-
-    # 清洗每个题目块的空白字符
-    cleaned_questions = [q.strip() for q in questions]
-
-    return cleaned_questions[:10]  # 确保只返回前10题
-
-
-# 测试用例
+        MATCH (q:Question {{id: "{self.question_id}"}})
+        OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
+        OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
+        RETURN 
+            kp.id AS knowledge_id,
+            kp.name AS knowledge_name,
+            lp.value AS literacy_id,
+            lp.title AS literacy_title"""
+        return self.graph.run(cypher).data()
+
+
+# 使用示例
 if __name__ == '__main__':
-    # 分段读入题目
-    question_blocks = split_questions('Backup/ShiTi.md')
+    question_blocks = split_questions('ShiTi.md')

    # 验证分割结果
    for i, block in enumerate(question_blocks, 1):
        print(f"第{i}题块：")
        print("-" * 50)
        kg = KnowledgeGraph(block)
-        cypher = kg.run()
-        if cypher:
-            # 插入数据
-            kg.graph.run(cypher)
-            print("执行成功！关联知识点：")
-            kg.query_related_knowledge()  # 新增查询
-        else:
-            print("未生成有效Cypher")

-    '''
-    # 基本可视化查询
-    MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
-    RETURN path
-    
-    # 带样式的可视化
-    MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp)
-    RETURN q, kp
-    // 在浏览器中点击左侧样式图标，设置：
-    // - Question节点颜色：橙色
-    // - KnowledgePoint节点颜色：蓝色
-    // - 关系线宽：3px
-    '''
+        if cypher := kg.run():
+            print("生成的Cypher:\n", cypher)
+            kg.graph.run(cypher)
+            print("关联结果：")
+            for record in kg.query_relations():
+                print(f"知识点：{record['knowledge_name']} ({record['knowledge_id']})")
+                print(f"素养点：{record['literacy_title']} ({record['literacy_id']})")
--- a/AI/Neo4j/ShiTi.md
+++ b/AI/Neo4j/ShiTi.md
@ -0,0 +1,32 @@
+### 三年级
+1. 【购物计算】小明用50元买了3本笔记本，每本8元，还剩多少钱？
+   - 答案：50 - 3×8 = 26元
+
+2. 【乘法应用】学校食堂每天消耗15袋大米，一周（5天）需要准备多少袋？
+   - 答案：15×5 = 75袋
+
+3. 【周长计算】一个正方形花坛边长3米，要在四周围栅栏，需要多长的栅栏？
+   - 答案：3×4 = 12米
+
+### 四年级
+4. 【四则运算】图书馆原有图书1250本，上午借出368本，下午归还195本，现有多少本？
+   - 答案：1250 - 368 + 195 = 1077本
+
+5. 【面积问题】长方形果园长25米，宽比长短7米，这个果园的面积是多少？
+   - 答案：25×(25-7) = 450平方米
+
+6. 【时间问题】甲乙两车从相距240公里的两地同时出发相向而行，甲车时速60公里，乙车时速40公里，几小时后相遇？
+   - 答案：240÷(60+40) = 2.4小时
+
+### 五年级
+7. 【分数运算】果汁店第一天卖出3/4吨橙汁，第二天卖出1/2吨苹果汁，两天共卖出多少吨？
+   - 答案：3/4 + 1/2 = 5/4吨
+
+8. 【小数应用】文具店钢笔单价12.5元，买4支送1支，买20支实际要付多少钱？
+   - 答案：(20÷5)×4×12.5 = 200元
+
+9. 【体积计算】长方体游泳池长25米，宽12米，深1.8米，这个游泳池最多能装多少立方米水？
+   - 答案：25×12×1.8 = 540立方米
+
+10. 【比例问题】妈妈给小明和妹妹36元零花钱，按5:4分配，小明能得多少元？
+    - 答案：36÷(5+4)×5 = 20元