You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

150 lines
5.4 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import re
import hashlib
from py2neo import Graph
from openai import OpenAI
from Config import *
# 切割试题
def split_questions(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 使用正则表达式匹配题目块(包含答案)
pattern = r'(\d+\.\s+【.*?】.*?(?=\n\d+\.|\Z))'
questions = re.findall(pattern, content, re.DOTALL)
# 清洗每个题目块的空白字符
cleaned_questions = [q.strip() for q in questions]
return cleaned_questions[:10] # 确保只返回前10题
class KnowledgeGraph:
def __init__(self, content: str):
self.content = content
self.question_id = hashlib.md5(content.encode()).hexdigest()[:8]
self.graph = Graph(NEO4J_URI, auth=NEO4J_AUTH)
# 双数据源加载
self.knowledge_points = self._get_knowledge_points()
self.literacy_points = self._get_literacy_points()
print(f"已加载知识点:{len(self.knowledge_points)}个,素养点:{len(self.literacy_points)}")
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
def _get_knowledge_points(self) -> dict:
try:
return {row['n.id']: row['n.name']
for row in self.graph.run("MATCH (n:KnowledgePoint) RETURN n.id, n.name")}
except Exception as e:
print(f"知识点加载失败:{str(e)}")
return {}
def _get_literacy_points(self) -> dict:
try:
return {row['n.value']: row['n.title']
for row in self.graph.run("MATCH (n:LiteracyNode) RETURN n.value, n.title")}
except Exception as e:
print(f"素养点加载失败:{str(e)}")
return {}
def _make_prompt(self) -> str:
kp_samples = "\n".join([f"{k}: {v}" for k, v in list(self.knowledge_points.items())[:3]])
lp_samples = "\n".join([f"{k}: {v}" for k, v in list(self.literacy_points.items())[:3]])
return f"""请分析题目考查的知识点和核心素养:
可用知识点ID:名称):
{kp_samples}
...共{len(self.knowledge_points)}个知识点
可用素养点ID:名称):
{lp_samples}
...共{len(self.literacy_points)}个素养点
生成要求:
1. 必须使用上述ID
2. 按以下格式生成Cypher代码
MERGE (q:Question {{id: "{self.question_id}"}})
SET q.content = "题目内容"
WITH q
MATCH (kp:KnowledgePoint {{id: "知识点ID"}})
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
WITH q
MATCH (lp:LiteracyNode {{value: "素养点ID"}})
MERGE (q)-[:RELATES_TO_LITERACY]->(lp)"""
def _clean_cypher(self, code: str) -> str:
valid_kp_ids = [k.upper() for k in self.knowledge_points.keys()]
valid_lp_ids = [k.upper() for k in self.literacy_points.keys()]
cleaned = []
lines = [line.strip() for line in code.split('\n') if line.strip()]
for line in lines:
# 处理知识点匹配
if 'MATCH (kp:KnowledgePoint' in line:
if match := re.search(r'id: ["\'](.*?)["\']', line):
kp_id = match.group(1).upper()
if kp_id in valid_kp_ids:
cleaned.append(line.replace(match.group(1), kp_id))
# 处理素养点匹配
elif 'MATCH (lp:LiteracyNode' in line:
if match := re.search(r'value: ["\'](.*?)["\']', line):
lp_id = match.group(1).upper()
if lp_id in valid_lp_ids:
cleaned.append(line.replace(match.group(1), lp_id))
# 保留其他合法语句
elif line.startswith(('MERGE', 'WITH', 'SET')):
cleaned.append(line)
return '\n'.join(cleaned)
def run(self) -> str:
try:
response = self.client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": self._make_prompt()},
{"role": "user", "content": f"题目内容:{self.content}"}
]
)
return self._clean_cypher(response.choices[0].message.content)
except Exception as e:
print(f"分析失败:{str(e)}")
return ""
def query_relations(self):
cypher = f"""
MATCH (q:Question {{id: "{self.question_id}"}})
OPTIONAL MATCH (q)-[:TESTS_KNOWLEDGE]->(kp)
OPTIONAL MATCH (q)-[:RELATES_TO_LITERACY]->(lp)
RETURN
kp.id AS knowledge_id,
kp.name AS knowledge_name,
lp.value AS literacy_id,
lp.title AS literacy_title"""
return self.graph.run(cypher).data()
# 使用示例
if __name__ == '__main__':
question_blocks = split_questions('ShiTi.md')
# 验证分割结果
for i, block in enumerate(question_blocks, 1):
print(f"{i}题块:")
print("-" * 50)
kg = KnowledgeGraph(block)
if cypher := kg.run():
print("生成的Cypher:\n", cypher)
kg.graph.run(cypher)
print("关联结果:")
for record in kg.query_relations():
print(f"知识点:{record['knowledge_name']} ({record['knowledge_id']})")
print(f"素养点:{record['literacy_title']} ({record['literacy_id']})")