You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

155 lines
6.0 KiB

5 months ago
# -*- coding: utf-8 -*-
5 months ago
import re
5 months ago
import time
5 months ago
import hashlib
5 months ago
from typing import Iterator, Tuple
5 months ago
from openai import OpenAI
from openai.types.chat import ChatCompletionChunk
5 months ago
from Config import *
5 months ago
class KnowledgeGraph:
5 months ago
def __init__(self, content: str):
self.content = content
5 months ago
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
5 months ago
self.question_id = self._generate_question_id()
def _generate_question_id(self) -> str:
"""生成题目唯一标识符"""
5 months ago
return hashlib.md5(self.content.encode()).hexdigest()[:8]
5 months ago
5 months ago
def _generate_stream(self) -> Iterator[ChatCompletionChunk]:
5 months ago
"""动态化提示词版本"""
system_prompt = f'''请根据题目内容生成Neo4j Cypher语句严格遵循以下规则
# 节点创建规范
1. 知识点节点
- 标签: KnowledgePoint
- 必须属性:
* id: "KP_" + 知识点名称的MD5前6位示例name="分数运算" id="KP_ae3b8c"
* name: 知识点名称从题目内容中提取
* level: 学段小学/初中/高中
2. 能力点节点
- 标签: AbilityPoint
- 必须属性:
* id: "AB_" + 能力名称的MD5前6位
* name: 能力点名称
* category: 能力类型计算/推理/空间想象等
3. 题目节点
- 标签: Question
- 必须属性:
* id: "{self.question_id}"已根据题目内容生成
* content: 题目文本摘要50字内
* difficulty: 难度系数1-5整数
# 关系规则
1. 题目与知识点关系
(q:Question)-[:TESTS_KNOWLEDGE]->(kp:KnowledgePoint)
需设置权重属性 weight0.1-1.0
2. 题目与能力点关系
(q:Question)-[:REQUIRES_ABILITY]->(ab:AbilityPoint)
需设置权重属性 weight
# 生成步骤
1. 先创建约束必须
CREATE CONSTRAINT IF NOT EXISTS FOR (kp:KnowledgePoint) REQUIRE kp.id IS UNIQUE;
CREATE CONSTRAINT IF NOT EXISTS FOR (ab:AbilityPoint) REQUIRE ab.id IS UNIQUE;
2. 使用MERGE创建节点禁止使用CREATE
3. 最后创建关系需先MATCH已存在节点
# 当前题目信息
- 生成的问题ID: {self.question_id}
- 题目内容: "{self.content[:50]}..."已截断'''
5 months ago
5 months ago
return self.client.chat.completions.create(
5 months ago
model=MODEL_NAME,
5 months ago
messages=[
{"role": "system", "content": system_prompt},
5 months ago
{"role": "user", "content": self.content}
5 months ago
],
stream=True,
5 months ago
timeout=300
5 months ago
)
5 months ago
def _extract_cypher(self, content: str) -> str:
5 months ago
"""增强的Cypher提取处理多代码块"""
cypher_blocks = []
# 匹配所有cypher代码块包含语言声明
5 months ago
pattern = r"```(?:cypher)?\n(.*?)```"
5 months ago
for block in re.findall(pattern, content, re.DOTALL):
# 清理注释和空行
cleaned = [
line.split('//')[0].strip()
for line in block.split('\n')
if line.strip() and not line.strip().startswith('//')
]
if cleaned:
cypher_blocks.append('\n'.join(cleaned))
5 months ago
5 months ago
return ';\n\n'.join(cypher_blocks)
5 months ago
def run(self) -> Tuple[bool, str, str]:
5 months ago
"""执行生成流程(确保所有路径都有返回值)"""
5 months ago
start_time = time.time()
spinner = ['', '', '', '', '', '', '', '', '', '']
content_buffer = []
try:
print(f"🚀 开始生成知识点和能力点的总结和插入语句")
5 months ago
stream = self._generate_stream()
5 months ago
5 months ago
# 添加流数据检查
if not stream:
print("\n❌ 生成失败:无法获取生成流")
return False, "生成流获取失败", ""
5 months ago
for idx, chunk in enumerate(stream):
5 months ago
print(f"\r{spinner[idx % 10]} 生成中({int(time.time() - start_time)}秒)", end="")
5 months ago
if chunk.choices and chunk.choices[0].delta.content:
content_chunk = chunk.choices[0].delta.content
content_buffer.append(content_chunk)
if len(content_buffer) == 1:
print("\n\n📝 内容生成开始:")
print(content_chunk, end="", flush=True)
5 months ago
5 months ago
# 确保最终返回
5 months ago
if content_buffer:
5 months ago
full_content = ''.join(content_buffer)
5 months ago
cypher_script = self._extract_cypher(full_content)
5 months ago
print(f"\n\n✅ 生成成功!耗时 {int(time.time() - start_time)}")
5 months ago
print("\n================ 完整结果 ================")
print(full_content)
5 months ago
print("\n================ Cypher语句 ===============")
print(cypher_script if cypher_script else "未检测到Cypher语句")
print("==========================================")
return True, full_content, cypher_script
5 months ago
# 添加空内容处理
print("\n⚠️ 生成完成但未获取到有效内容")
return False, "空内容", ""
5 months ago
except Exception as e:
print(f"\n\n❌ 生成失败:{str(e)}")
5 months ago
return False, str(e), ""
5 months ago
if __name__ == '__main__':
5 months ago
shiti_content = '''
5 months ago
下面是一道小学三年级的数学题目巧求周长
把7个完全相同的小长方形拼成如图的样子已知每个小长方形的长是10厘米则拼成的大长方形的周长是多少厘米
'''
5 months ago
kg = KnowledgeGraph(shiti_content)
5 months ago
success, result, cypher = kg.run()
5 months ago
5 months ago
if success and cypher:
with open("knowledge_graph.cypher", "w", encoding="utf-8") as f:
f.write(cypher)
5 months ago
print(f"\nCypher语句已保存至 knowledge_graph.cypher (题目ID: {kg.question_id})")