|
|
|
@ -8,6 +8,7 @@ from openai import OpenAI
|
|
|
|
|
from openai.types.chat import ChatCompletionChunk
|
|
|
|
|
from K2_Neo4jExecutor import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class KnowledgeGraph:
|
|
|
|
|
def __init__(self, content: str):
|
|
|
|
|
self.content = content
|
|
|
|
@ -18,18 +19,19 @@ class KnowledgeGraph:
|
|
|
|
|
self.client = OpenAI(api_key=MODEL_API_KEY, base_url=MODEL_API_URL)
|
|
|
|
|
|
|
|
|
|
def _validate_ids(self, line: str) -> bool:
|
|
|
|
|
"""修正后的ID验证(改为大小写不敏感)"""
|
|
|
|
|
# 使用更灵活的正则表达式
|
|
|
|
|
"""修正后的ID验证(严格匹配数据库格式)"""
|
|
|
|
|
# 调整正则表达式匹配6位小写hex格式
|
|
|
|
|
found_ids = {
|
|
|
|
|
'kp': set(re.findall(r'(?i)(kp_[\da-f]{6})', line)),
|
|
|
|
|
'ab': set(re.findall(r'(?i)(ab_[\da-f]{6})', line))
|
|
|
|
|
'kp': set(re.findall(r'(kp_[a-f0-9]{6})', line.lower())),
|
|
|
|
|
'ab': set(re.findall(r'(ab_[a-f0-9]{6})', line.lower()))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# 转换为小写统一比较
|
|
|
|
|
valid_kp = all(kp.lower() in self.existing_knowledge for kp in found_ids['kp'])
|
|
|
|
|
valid_ab = all(ab.lower() in self.existing_ability for ab in found_ids['ab'])
|
|
|
|
|
# 直接检查小写形式
|
|
|
|
|
valid_kp = all(kp in self.existing_knowledge for kp in found_ids['kp'])
|
|
|
|
|
valid_ab = all(ab in self.existing_ability for ab in found_ids['ab'])
|
|
|
|
|
|
|
|
|
|
return valid_kp and valid_ab
|
|
|
|
|
|
|
|
|
|
def _init_graph_connection(self) -> Graph:
|
|
|
|
|
"""初始化并测试数据库连接"""
|
|
|
|
|
try:
|
|
|
|
@ -56,6 +58,7 @@ class KnowledgeGraph:
|
|
|
|
|
has_merge = re.search(r'\bMERGE\s*\(q:Question\b', cypher, re.IGNORECASE)
|
|
|
|
|
has_with = re.search(r'\bWITH\s+q\b', cypher, re.IGNORECASE)
|
|
|
|
|
return not has_merge or (has_merge and has_with)
|
|
|
|
|
|
|
|
|
|
def _generate_question_id(self) -> str:
|
|
|
|
|
"""生成题目唯一标识符"""
|
|
|
|
|
return hashlib.md5(self.content.encode()).hexdigest()[:8]
|
|
|
|
@ -71,22 +74,25 @@ class KnowledgeGraph:
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
def _generate_stream(self) -> Iterator[ChatCompletionChunk]:
|
|
|
|
|
"""生成限制性提示词"""
|
|
|
|
|
# 修改提示词中的Cypher示例部分
|
|
|
|
|
system_prompt = f'''
|
|
|
|
|
将题目中涉及到的小学数学知识点、能力点进行总结,并且按照以下格式生成在neo4j-community-5.26.2上的语句:
|
|
|
|
|
// 在示例中强调WITH的必要性
|
|
|
|
|
"""生成限制性提示词(添加现有节点示例)"""
|
|
|
|
|
# 在提示词中添加现有节点示例
|
|
|
|
|
knowledge_samples = '\n'.join([f"KP_{k[3:]}: {v}" for k, v in list(self.existing_knowledge.items())[:5]])
|
|
|
|
|
ability_samples = '\n'.join([f"AB_{a[3:]}: {v}" for a, v in list(self.existing_ability.items())[:5]])
|
|
|
|
|
|
|
|
|
|
system_prompt = f''' 将题目中涉及到的小学数学知识点、能力点进行总结,并且按照以下格式生成在neo4j-community-5.26.2上的语句:
|
|
|
|
|
请严格使用以下已有节点ID(不要创建新ID):
|
|
|
|
|
现有知识点示例(KP_后接6位小写字母/数字):
|
|
|
|
|
{knowledge_samples}
|
|
|
|
|
现有能力点示例(AB_后接6位小写字母/数字):
|
|
|
|
|
{ability_samples}
|
|
|
|
|
生成格式要求:
|
|
|
|
|
MERGE (q:Question {{id: "{self.question_id}"}})
|
|
|
|
|
SET q.content = "..."
|
|
|
|
|
|
|
|
|
|
// 必须使用WITH传递上下文
|
|
|
|
|
WITH q
|
|
|
|
|
MATCH (kp:KnowledgePoint {{id: "KP_xxxxxx"}})
|
|
|
|
|
WITH q
|
|
|
|
|
MATCH (kp:KnowledgePoint {{id: "KP_xxxxxx"}}) // 必须使用已有KP_ID
|
|
|
|
|
MERGE (q)-[:TESTS_KNOWLEDGE]->(kp)
|
|
|
|
|
|
|
|
|
|
// 多个关系需要继续使用WITH
|
|
|
|
|
WITH q
|
|
|
|
|
MATCH (ab:AbilityPoint {{id: "AB_xxxxxx"}})
|
|
|
|
|
WITH q
|
|
|
|
|
MATCH (ab:AbilityPoint {{id: "AB_xxxxxx"}}) // 必须使用已有AB_ID
|
|
|
|
|
MERGE (q)-[:REQUIRES_ABILITY]->(ab)
|
|
|
|
|
'''
|
|
|
|
|
|
|
|
|
@ -146,12 +152,13 @@ class KnowledgeGraph:
|
|
|
|
|
filter_reason.append("权重值非法")
|
|
|
|
|
|
|
|
|
|
if filter_reason:
|
|
|
|
|
print(f"第{line_num}行被过滤: {raw_line[:50]}... | 原因: {', '.join(filter_reason)}")
|
|
|
|
|
print(f"第{line_num}行被过滤: {line[:50]}... | 原因: {', '.join(filter_reason)}")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
valid_lines.append(line)
|
|
|
|
|
|
|
|
|
|
return '\n'.join(valid_lines)
|
|
|
|
|
|
|
|
|
|
def run(self) -> Tuple[bool, str, str]:
|
|
|
|
|
"""执行安全生成流程"""
|
|
|
|
|
if not self.existing_knowledge or not self.existing_ability:
|
|
|
|
@ -179,13 +186,11 @@ class KnowledgeGraph:
|
|
|
|
|
|
|
|
|
|
if content_buffer:
|
|
|
|
|
full_content = ''.join(content_buffer)
|
|
|
|
|
cypher_script = self._extract_cypher(full_content)
|
|
|
|
|
|
|
|
|
|
print(f"\n\n✅ 生成完成!耗时 {int(time.time() - start_time)}秒")
|
|
|
|
|
return True, full_content, cypher_script
|
|
|
|
|
return True, full_content
|
|
|
|
|
|
|
|
|
|
print("\n⚠️ 生成完成但未获取到有效内容")
|
|
|
|
|
return False, "空内容", ""
|
|
|
|
|
return False, "空内容"
|
|
|
|
|
|
|
|
|
|
# 修改run方法中的异常处理
|
|
|
|
|
except Exception as e:
|
|
|
|
@ -208,7 +213,7 @@ if __name__ == '__main__':
|
|
|
|
|
'''
|
|
|
|
|
try:
|
|
|
|
|
kg = KnowledgeGraph(test_content)
|
|
|
|
|
success, result, cypher = kg.run()
|
|
|
|
|
success, cypher = kg.run()
|
|
|
|
|
res = executor.execute_cypher_text(cypher)
|
|
|
|
|
print("恭喜,执行数据插入完成!")
|
|
|
|
|
except Exception as e:
|
|
|
|
|