diff --git a/AI/Neo4j/N3_InputShiTi.py b/AI/Neo4j/N3_InputShiTi.py index e5de9e91..736ec5af 100644 --- a/AI/Neo4j/N3_InputShiTi.py +++ b/AI/Neo4j/N3_InputShiTi.py @@ -5,283 +5,145 @@ from py2neo import Graph from openai import OpenAI from Config import * +# 切割试题 +def split_questions(file_path): + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 使用正则表达式匹配题目块(包含答案) + pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))' + questions = re.findall(pattern, content, re.DOTALL) + + # 清洗每个题目块的空白字符 + cleaned_questions = [q.strip() for q in questions] + + return cleaned_questions[:10] # 确保只返回前10题 class KnowledgeGraph: def __init__(self, content: str): self.content = content self.question_id = hashlib.md5(content.encode()).hexdigest()[:8] self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH) + + # 双数据源加载 self.knowledge_points = self._get_knowledge_points() - self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL) + self.literacy_points = self._get_literacy_points() + print(f"已加载知识点:{len(self.knowledge_points)}个,素养点:{len(self.literacy_points)}个") - # self.knowledge_points = self._get_knowledge_points() - print("加载知识点数量:", len(self.knowledge_points)) # 添加调试信息 + self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL) def _get_knowledge_points(self) -> dict: - """保持ID原始大小写""" try: - # 移除lower()转换 - return {row['n.id']: row['n.name'] # 直接使用原始ID + return {row['n.id']: row['n.name'] for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")} except Exception as e: - print(f"获取知识点失败:", str(e)) + print(f"知识点加载失败:{str(e)}") return {} - def _make_prompt(self) -> str: - """生成知识点识别专用提示词""" - example_ids = list(self.knowledge_points.keys())[:5] - example_names = [self.knowledge_points[k] for k in example_ids] - - return f"""你是一个数学专家,请分析题目考查的知识点,严格: -1. 只使用以下存在的知识点(格式:ID:名称): -{", ".join([f"{k}:{v}" for k, v in zip(example_ids, example_names)])}... -共{len(self.knowledge_points)}个可用知识点 -2. 题目可能包含多个知识点,让仔细检查。 -3. 按此格式生成Cypher: -MERGE (q:Question {{id: "{self.question_id}"}}) -SET q.content = "题目内容" -WITH q -MATCH (kp:KnowledgePoint {{id: "知识点ID"}}) -MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)""" - - def _clean_cypher(self, code: str) -> str: - """完整Cypher清洗逻辑(增强版)""" - safe = [] - content_keywords = { - '行程问题': ['相遇', '相向而行', '追及', '速度', '路程'], - '几何问题': ['面积', '体积', '周长', '三角形', '长方体'], - '分数运算': ['分数', '百分比', '%', '分之'] - } - + def _get_literacy_points(self) -> dict: try: - # 提取代码块 - cypher_block = re.findall(r"```(?:cypher)?\n(.*?)```", code, re.DOTALL) - if not cypher_block: - print("未检测到Cypher代码块") - return "" - - # 预处理配置 - valid_ids_upper = [k.upper() for k in self.knowledge_points.keys()] - detected_types = [] - raw_lines = cypher_block[0].split('\n') - has_question = False - - # === 第一步:基础清洗 === - for line in raw_lines: - # 清理注释和空白 - clean_line = line.split('//')[0].strip() - if not clean_line: - continue - - # 阻止CREATE操作 - if 'CREATE' in clean_line.upper(): - print(f"阻止CREATE操作: {clean_line}") - continue - - # 强制Question节点在最前面 - if 'MERGE (q:Question' in clean_line: - has_question = True - safe.insert(0, clean_line) - continue - - safe.append(clean_line) - - # === 第二步:检测题目类型 === - for pattern, keys in content_keywords.items(): - if any(k in self.content for k in keys): - detected_types.append(pattern) - print(f"检测到题目类型: {pattern}") - - # === 第三步:处理知识点ID === - knowledge_lines = [] - for line in safe.copy(): - if 'MATCH (kp:KnowledgePoint' in line: - # 安全提取ID - match = re.search(r"id: ['\"](.*?)['\"]", line) - if not match: - print(f"无效的MATCH语句: {line}") - safe.remove(line) - continue + return {row['n.value']: row['n.title'] + for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")} + except Exception as e: + print(f"素养点加载失败:{str(e)}") + return {} - original_id = match.group(1) - upper_id = original_id.upper() + def _make_prompt(self) -> str: + kp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.knowledge_points.items())[:3]]) + lp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.literacy_points.items())[:3]]) - # 验证ID存在性 - if upper_id not in valid_ids_upper: - print(f"忽略无效知识点ID: {original_id}") - safe.remove(line) - continue + return f"""请分析题目考查的知识点和核心素养: - # 替换为正确的大写ID - new_line = line.replace(original_id, upper_id) - safe[safe.index(line)] = new_line - knowledge_lines.append(new_line) +可用知识点(ID:名称): +{kp_samples} +...共{len(self.knowledge_points)}个知识点 - # === 第四步:自动补充知识点 === - for dtype in detected_types: - # 安全获取已关联知识点ID - extracted_ids = [] - for line in knowledge_lines: - try: - match = re.search(r"id: ['\"](.*?)['\"]", line) - if match: - kp_id = match.group(1).upper() - extracted_ids.append(kp_id) - except AttributeError: - continue +可用素养点(ID:名称): +{lp_samples} +...共{len(self.literacy_points)}个素养点 - # 获取对应的知识点名称(确保为字符串) - type_exists = any( - dtype in str(self.knowledge_points.get(kp_id, '')) - for kp_id in extracted_ids - ) +生成要求: +1. 必须使用上述ID +2. 按以下格式生成Cypher代码: - if not type_exists: - # 查找匹配的知识点(添加空值过滤) - candidates = [ - (k, v) for k, v in self.knowledge_points.items() - if v and dtype in str(v) # 确保v是字符串 - and k.upper() in valid_ids_upper - ] +MERGE (q:Question {{id: "{self.question_id}"}}) +SET q.content = "题目内容" +WITH q +MATCH (kp:KnowledgePoint {{id: "知识点ID"}}) +MERGE (q)-[:TESTS_KNOWLEDGE]->(kp) +WITH q +MATCH (lp:LiteracyNode {{value: "素养点ID"}}) +MERGE (q)-[:RELATES_TO_LITERACY]->(lp)""" - # 按名称匹配度排序 - candidates.sort(key=lambda x: ( - dtype in x[1], # 优先完全匹配 - -len(x[1]) # 次优先名称长度短的 - ), reverse=True) + def _clean_cypher(self, code: str) -> str: + valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()] + valid_lp_ids = [k.upper() for k in self.literacy_points.keys()] - if candidates: - target_id, target_name = candidates[0] - print(f"补充知识点: {target_id} - {target_name}") - safe.extend([ - "WITH q", - f"MATCH (kp:KnowledgePoint {{id: \"{target_id.upper()}\"}})", - "MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)" - ]) - else: - print(f"未找到匹配的{dtype}知识点") + cleaned = [] + lines = [line.strip() for line in code.split('\n') if line.strip()] - # === 第五步:语法修正 === - # 确保Question节点后紧跟WITH - if has_question: - for i, line in enumerate(safe): - if 'MERGE (q:Question' in line: - # 检查下一条是否是WITH - if i + 1 >= len(safe) or not safe[i + 1].startswith('WITH'): - safe.insert(i + 1, "WITH q") - break + for line in lines: + # 处理知识点匹配 + if 'MATCH (kp:KnowledgePoint' in line: + if match := re.search(r'id: ["\'](.*?)["\']', line): + kp_id = match.group(1).upper() + if kp_id in valid_kp_ids: + cleaned.append(line.replace(match.group(1), kp_id)) - # 移除重复的WITH语句 - final_safe = [] - prev_was_with = False - for line in safe: - if line.startswith('WITH'): - if not prev_was_with: - final_safe.append(line) - prev_was_with = True - else: - final_safe.append(line) - prev_was_with = False + # 处理素养点匹配 + elif 'MATCH (lp:LiteracyNode' in line: + if match := re.search(r'value: ["\'](.*?)["\']', line): + lp_id = match.group(1).upper() + if lp_id in valid_lp_ids: + cleaned.append(line.replace(match.group(1), lp_id)) - return '\n'.join(final_safe) + # 保留其他合法语句 + elif line.startswith(('MERGE', 'WITH', 'SET')): + cleaned.append(line) - except Exception as e: - print(f"清洗Cypher时发生错误: {str(e)}") - return "" + return '\n'.join(cleaned) def run(self) -> str: - """执行知识点关联流程""" try: response = self.client.chat.completions.create( model=MODEL_NAME, messages=[ - { - "role": "system", - "content": self._make_prompt() - }, - { - "role": "user", - "content": f"题目内容:{self.content}\n请分析考查的知识点,只返回Cypher代码" - } + {"role": "system", "content": self._make_prompt()}, + {"role": "user", "content": f"题目内容:{self.content}"} ] ) - - raw_cypher = response.choices[0].message.content - cleaned_cypher = self._clean_cypher(raw_cypher) - - if cleaned_cypher: - print("验证通过的Cypher:\n", cleaned_cypher) - return cleaned_cypher - return "" - + return self._clean_cypher(response.choices[0].message.content) except Exception as e: - print("知识点分析失败:", str(e)) + print(f"分析失败:{str(e)}") return "" - def query_related_knowledge(self): - """查询题目关联的知识点""" + def query_relations(self): cypher = f""" - MATCH (q:Question {{id: "{self.question_id}"}})-[:TESTS_KNOWLEDGE]->(kp) - RETURN kp.id AS knowledge_id, kp.name AS knowledge_name - """ - try: - result = self.graph.run(cypher).data() - if result: - print(f"题目关联的知识点({self.question_id}):") - for row in result: - print(f"- {row['knowledge_name']} (ID: {row['knowledge_id']})") - else: - print("该题目尚未关联知识点") - return result - except Exception as e: - print("查询失败:", str(e)) - return [] - - -# 切割试题 -def split_questions(file_path): - with open(file_path, 'r', encoding='utf-8') as f: - content = f.read() - - # 使用正则表达式匹配题目块(包含答案) - pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))' - questions = re.findall(pattern, content, re.DOTALL) - - # 清洗每个题目块的空白字符 - cleaned_questions = [q.strip() for q in questions] - - return cleaned_questions[:10] # 确保只返回前10题 - - -# 测试用例 + MATCH (q:Question {{id: "{self.question_id}"}}) + OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp) + OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp) + RETURN + kp.id AS knowledge_id, + kp.name AS knowledge_name, + lp.value AS literacy_id, + lp.title AS literacy_title""" + return self.graph.run(cypher).data() + + +# 使用示例 if __name__ == '__main__': - # 分段读入题目 - question_blocks = split_questions('Backup/ShiTi.md') + question_blocks = split_questions('ShiTi.md') # 验证分割结果 for i, block in enumerate(question_blocks, 1): print(f"第{i}题块:") print("-" * 50) kg = KnowledgeGraph(block) - cypher = kg.run() - if cypher: - # 插入数据 - kg.graph.run(cypher) - print("执行成功!关联知识点:") - kg.query_related_knowledge() # 新增查询 - else: - print("未生成有效Cypher") - ''' - # 基本可视化查询 - MATCH path=(q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) - RETURN path - - # 带样式的可视化 - MATCH (q:Question {id: "07ece550"})-[:TESTS_KNOWLEDGE]->(kp) - RETURN q, kp - // 在浏览器中点击左侧样式图标,设置: - // - Question节点颜色:橙色 - // - KnowledgePoint节点颜色:蓝色 - // - 关系线宽:3px - ''' + if cypher := kg.run(): + print("生成的Cypher:\n", cypher) + kg.graph.run(cypher) + print("关联结果:") + for record in kg.query_relations(): + print(f"知识点:{record['knowledge_name']} ({record['knowledge_id']})") + print(f"素养点:{record['literacy_title']} ({record['literacy_id']})") diff --git a/AI/Neo4j/ShiTi.md b/AI/Neo4j/ShiTi.md new file mode 100644 index 00000000..4a621bb1 --- /dev/null +++ b/AI/Neo4j/ShiTi.md @@ -0,0 +1,32 @@ +### 三年级 +1. 【购物计算】小明用50元买了3本笔记本,每本8元,还剩多少钱? + - 答案:50 - 3×8 = 26元 + +2. 【乘法应用】学校食堂每天消耗15袋大米,一周(5天)需要准备多少袋? + - 答案:15×5 = 75袋 + +3. 【周长计算】一个正方形花坛边长3米,要在四周围栅栏,需要多长的栅栏? + - 答案:3×4 = 12米 + +### 四年级 +4. 【四则运算】图书馆原有图书1250本,上午借出368本,下午归还195本,现有多少本? + - 答案:1250 - 368 + 195 = 1077本 + +5. 【面积问题】长方形果园长25米,宽比长短7米,这个果园的面积是多少? + - 答案:25×(25-7) = 450平方米 + +6. 【时间问题】甲乙两车从相距240公里的两地同时出发相向而行,甲车时速60公里,乙车时速40公里,几小时后相遇? + - 答案:240÷(60+40) = 2.4小时 + +### 五年级 +7. 【分数运算】果汁店第一天卖出3/4吨橙汁,第二天卖出1/2吨苹果汁,两天共卖出多少吨? + - 答案:3/4 + 1/2 = 5/4吨 + +8. 【小数应用】文具店钢笔单价12.5元,买4支送1支,买20支实际要付多少钱? + - 答案:(20÷5)×4×12.5 = 200元 + +9. 【体积计算】长方体游泳池长25米,宽12米,深1.8米,这个游泳池最多能装多少立方米水? + - 答案:25×12×1.8 = 540立方米 + +10. 【比例问题】妈妈给小明和妹妹36元零花钱,按5:4分配,小明能得多少元? + - 答案:36÷(5+4)×5 = 20元 \ No newline at end of file