QingLong/AI/Neo4j/N3_InputShiTi.py

# -*- coding: utf-8 -*-
import re
import hashlib
from py2neo import Graph
from openai import OpenAI
from Config import *

# 切割试题
def split_questions(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # 使用正则表达式匹配题目块（包含答案）
    pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
    questions = re.findall(pattern, content, re.DOTALL)

    # 清洗每个题目块的空白字符
    cleaned_questions = [q.strip() for q in questions]

    return cleaned_questions[:10]  # 确保只返回前10题

class KnowledgeGraph:
    def __init__(self, content: str):
        self.content = content
        self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
        self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)

        # 双数据源加载
        self.knowledge_points = self._get_knowledge_points()
        self.literacy_points = self._get_literacy_points()
        print(f"已加载知识点：{len(self.knowledge_points)}个，素养点：{len(self.literacy_points)}个")

        self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)

    def _get_knowledge_points(self) -> dict:
        try:
            return {row['n.id']: row['n.name']
                    for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
        except Exception as e:
            print(f"知识点加载失败：{str(e)}")
            return {}

    def _get_literacy_points(self) -> dict:
        try:
            return {row['n.value']: row['n.title']
                    for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
        except Exception as e:
            print(f"素养点加载失败：{str(e)}")
            return {}

    def _make_prompt(self) -> str:
        kp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
        lp_samples = "\n".join([f"• {k}: {v}" for k, v in list(self.literacy_points.items())[:3]])

        return f"""请分析题目考查的知识点和核心素养：

可用知识点（ID:名称）：
{kp_samples}
...共{len(self.knowledge_points)}个知识点

可用素养点（ID:名称）：
{lp_samples}
...共{len(self.literacy_points)}个素养点

生成要求：
1. 必须使用上述ID
2. 按以下格式生成Cypher代码：

MERGE (q:Question {{id: "{self.question_id}"}})
SET q.content = "题目内容"
WITH q
MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
WITH q
MATCH (lp:LiteracyNode {{value: "素养点ID"}})
MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""

    def _clean_cypher(self, code: str) -> str:
        valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
        valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]

        cleaned = []
        lines = [line.strip() for line in code.split('\n') if line.strip()]

        for line in lines:
            # 处理知识点匹配
            if 'MATCH (kp:KnowledgePoint' in line:
                if match := re.search(r'id: ["\'](.*?)["\']', line):
                    kp_id = match.group(1).upper()
                    if kp_id in valid_kp_ids:
                        cleaned.append(line.replace(match.group(1), kp_id))

            # 处理素养点匹配
            elif 'MATCH (lp:LiteracyNode' in line:
                if match := re.search(r'value: ["\'](.*?)["\']', line):
                    lp_id = match.group(1).upper()
                    if lp_id in valid_lp_ids:
                        cleaned.append(line.replace(match.group(1), lp_id))

            # 保留其他合法语句
            elif line.startswith(('MERGE', 'WITH', 'SET')):
                cleaned.append(line)

        return '\n'.join(cleaned)

    def run(self) -> str:
        try:
            response = self.client.chat.completions.create(
                model=MODEL_NAME,
                messages=[
                    {"role": "system", "content": self._make_prompt()},
                    {"role": "user", "content": f"题目内容：{self.content}"}
                ]
            )
            return self._clean_cypher(response.choices[0].message.content)
        except Exception as e:
            print(f"分析失败：{str(e)}")
            return ""

    def query_relations(self):
        cypher = f"""
        MATCH (q:Question {{id: "{self.question_id}"}})
        OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
        OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
        RETURN
            kp.id AS knowledge_id,
            kp.name AS knowledge_name,
            lp.value AS literacy_id,
            lp.title AS literacy_title"""
        return self.graph.run(cypher).data()


# 使用示例
if __name__ == '__main__':
    question_blocks = split_questions('ShiTi.md')

    # 验证分割结果
    for i, block in enumerate(question_blocks, 1):
        print(f"第{i}题块：")
        print("-" * 50)
        kg = KnowledgeGraph(block)

        if cypher := kg.run():
            print("生成的Cypher:\n", cypher)
            kg.graph.run(cypher)
            print("关联结果：")
            for record in kg.query_relations():
                print(f"知识点：{record['knowledge_name']} ({record['knowledge_id']})")
                print(f"素养点：{record['literacy_title']} ({record['literacy_id']})")