You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
6.0 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

# -*- coding: utf-8 -*-
import re
import time
import hashlib
from typing import Iterator, Tuple
from openai import OpenAI
from openai.types.chat import ChatCompletionChunk
from Config import *
class KnowledgeGraph:
def __init__(self, content: str):
self.content = content
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
self.question_id = self._generate_question_id()
def _generate_question_id(self) -> str:
"""生成题目唯一标识符"""
return hashlib.md5(self.content.encode()).hexdigest()[:8]
def _generate_stream(self) -> Iterator[ChatCompletionChunk]:
"""动态化提示词版本"""
system_prompt = f'''请根据题目内容生成Neo4j Cypher语句严格遵循以下规则
# 节点创建规范
1. 知识点节点:
- 标签: KnowledgePoint
- 必须属性:
* id: "KP_" + 知识点名称的MD5前6位示例name="分数运算" → id="KP_ae3b8c"
* name: 知识点名称(从题目内容中提取)
* level: 学段(小学/初中/高中)
2. 能力点节点:
- 标签: AbilityPoint
- 必须属性:
* id: "AB_" + 能力名称的MD5前6位
* name: 能力点名称
* category: 能力类型(计算/推理/空间想象等)
3. 题目节点:
- 标签: Question
- 必须属性:
* id: "{self.question_id}"(已根据题目内容生成)
* content: 题目文本摘要50字内
* difficulty: 难度系数1-5整数
# 关系规则
1. 题目与知识点关系:
(q:Question)-[:TESTS_KNOWLEDGE]->(kp:KnowledgePoint)
需设置权重属性 weight0.1-1.0
2. 题目与能力点关系:
(q:Question)-[:REQUIRES_ABILITY]->(ab:AbilityPoint)
需设置权重属性 weight
# 生成步骤
1. 先创建约束(必须):
CREATE CONSTRAINT IF NOT EXISTS FOR (kp:KnowledgePoint) REQUIRE kp.id IS UNIQUE;
CREATE CONSTRAINT IF NOT EXISTS FOR (ab:AbilityPoint) REQUIRE ab.id IS UNIQUE;
2. 使用MERGE创建节点禁止使用CREATE
3. 最后创建关系需先MATCH已存在节点
# 当前题目信息
- 生成的问题ID: {self.question_id}
- 题目内容: "{self.content[:50]}..."(已截断)'''
return self.client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": self.content}
],
stream=True,
timeout=300
)
def _extract_cypher(self, content: str) -> str:
"""增强的Cypher提取处理多代码块"""
cypher_blocks = []
# 匹配所有cypher代码块包含语言声明
pattern = r"```(?:cypher)?\n(.*?)```"
for block in re.findall(pattern, content, re.DOTALL):
# 清理注释和空行
cleaned = [
line.split('//')[0].strip()
for line in block.split('\n')
if line.strip() and not line.strip().startswith('//')
]
if cleaned:
cypher_blocks.append('\n'.join(cleaned))
return ';\n\n'.join(cypher_blocks)
def run(self) -> Tuple[bool, str, str]:
"""执行生成流程(确保所有路径都有返回值)"""
start_time = time.time()
spinner = ['', '', '', '', '', '', '', '', '', '']
content_buffer = []
try:
print(f"🚀 开始生成知识点和能力点的总结和插入语句")
stream = self._generate_stream()
# 添加流数据检查
if not stream:
print("\n❌ 生成失败:无法获取生成流")
return False, "生成流获取失败", ""
for idx, chunk in enumerate(stream):
print(f"\r{spinner[idx % 10]} 生成中({int(time.time() - start_time)}秒)", end="")
if chunk.choices and chunk.choices[0].delta.content:
content_chunk = chunk.choices[0].delta.content
content_buffer.append(content_chunk)
if len(content_buffer) == 1:
print("\n\n📝 内容生成开始:")
print(content_chunk, end="", flush=True)
# 确保最终返回
if content_buffer:
full_content = ''.join(content_buffer)
cypher_script = self._extract_cypher(full_content)
print(f"\n\n✅ 生成成功!耗时 {int(time.time() - start_time)}")
print("\n================ 完整结果 ================")
print(full_content)
print("\n================ Cypher语句 ===============")
print(cypher_script if cypher_script else "未检测到Cypher语句")
print("==========================================")
return True, full_content, cypher_script
# 添加空内容处理
print("\n⚠️ 生成完成但未获取到有效内容")
return False, "空内容", ""
except Exception as e:
print(f"\n\n❌ 生成失败:{str(e)}")
return False, str(e), ""
if __name__ == '__main__':
shiti_content = '''
下面是一道小学三年级的数学题目,巧求周长:
把7个完全相同的小长方形拼成如图的样子已知每个小长方形的长是10厘米则拼成的大长方形的周长是多少厘米
'''
kg = KnowledgeGraph(shiti_content)
success, result, cypher = kg.run()
if success and cypher:
with open("knowledge_graph.cypher", "w", encoding="utf-8") as f:
f.write(cypher)
print(f"\nCypher语句已保存至 knowledge_graph.cypher (题目ID: {kg.question_id})")