main
HuangHai 2 weeks ago
parent beedccf163
commit 09fd16e4b2

@ -6,15 +6,15 @@ EMBED_DIM = 1024
EMBED_MAX_TOKEN_SIZE = 8192
# 大模型 【DeepSeek深度求索官方】
LLM_API_KEY="sk-44ae895eeb614aa1a9c6460579e322f1"
LLM_API_KEY = "sk-44ae895eeb614aa1a9c6460579e322f1"
LLM_BASE_URL = "https://api.deepseek.com"
LLM_MODEL_NAME = "deepseek-chat"
# 阿里云提供的大模型服务
#LLM_API_KEY="sk-f6da0c787eff4b0389e4ad03a35a911f"
#LLM_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
#LLM_MODEL_NAME = "qwen-plus" # 不要使用通义千问,会导致化学方程式不正确!
#LLM_MODEL_NAME = "deepseek-v3"
# LLM_API_KEY="sk-f6da0c787eff4b0389e4ad03a35a911f"
# LLM_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
# LLM_MODEL_NAME = "qwen-plus" # 不要使用通义千问,会导致化学方程式不正确!
# LLM_MODEL_NAME = "deepseek-v3"
# 视觉模型
VISION_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl"
@ -23,10 +23,11 @@ VISION_MODEL_NAME = "GLM-4.1V-9B-Thinking"
# Neo4j
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="DsideaL147258369"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "DsideaL147258369"
NEO4J_AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)
# 写入Neo4j的批次大小
BATCH_SIZE_NODES=100
BATCH_SIZE_NODES = 100
# 写入Neo4j的批次大小
BATCH_SIZE_EDGES=100
BATCH_SIZE_EDGES = 100

@ -4,10 +4,10 @@ import json # 用于JSON数据处理
import xml.etree.ElementTree as ET # 用于解析XML文件
from neo4j import GraphDatabase # Neo4j数据库驱动
from Config.Config import * # 导入配置文件
from Util.Neo4jExecutor import Neo4jExecutor
# 动画学院
WORKING_DIR = "./Topic/DongHua"
#TXT_FILE = "sushi.txt"
def xml_to_json(xml_file):
"""
@ -113,7 +113,13 @@ def process_in_batches(tx, query, data, batch_size):
tx.run(query, {"nodes": batch} if "nodes" in query else {"edges": batch})
def main():
if __name__ == "__main__":
# 创建Neo4jExecutor实例
executor = Neo4jExecutor.create_default()
executor.graph.run("MATCH (n) DETACH DELETE n")
print("清库成功")
# 文件路径设置
xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml")
json_file = os.path.join(WORKING_DIR, "graph_data.json")
@ -121,7 +127,7 @@ def main():
# 将XML转换为JSON
json_data = convert_xml_to_json(xml_file, json_file)
if json_data is None:
return
exit(0)
# 加载节点和边数据
nodes = json_data.get("nodes", [])
@ -129,47 +135,47 @@ def main():
# Neo4j查询语句
create_nodes_query = """
UNWIND $nodes AS node
MERGE (e:Entity {id: node.id})
SET e.entity_type = node.entity_type,
e.description = node.description,
e.source_id = node.source_id,
e.displayName = node.id
REMOVE e:Entity
WITH e, node
CALL apoc.create.addLabels(e, [node.id]) YIELD node AS labeledNode
RETURN count(*)
"""
UNWIND $nodes AS node
MERGE (e:Entity {id: node.id})
SET e.entity_type = node.entity_type,
e.description = node.description,
e.source_id = node.source_id,
e.displayName = node.id
REMOVE e:Entity
WITH e, node
CALL apoc.create.addLabels(e, [node.id]) YIELD node AS labeledNode
RETURN count(*)
"""
create_edges_query = """
UNWIND $edges AS edge
MATCH (source {id: edge.source})
MATCH (target {id: edge.target})
WITH source, target, edge,
CASE
WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
WHEN edge.keywords CONTAINS 'located' THEN 'located'
WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '"', '')
END AS relType
CALL apoc.create.relationship(source, relType, {
weight: edge.weight,
description: edge.description,
keywords: edge.keywords,
source_id: edge.source_id
}, target) YIELD rel
RETURN count(*)
"""
UNWIND $edges AS edge
MATCH (source {id: edge.source})
MATCH (target {id: edge.target})
WITH source, target, edge,
CASE
WHEN edge.keywords CONTAINS 'lead' THEN 'lead'
WHEN edge.keywords CONTAINS 'participate' THEN 'participate'
WHEN edge.keywords CONTAINS 'uses' THEN 'uses'
WHEN edge.keywords CONTAINS 'located' THEN 'located'
WHEN edge.keywords CONTAINS 'occurs' THEN 'occurs'
ELSE REPLACE(SPLIT(edge.keywords, ',')[0], '"', '')
END AS relType
CALL apoc.create.relationship(source, relType, {
weight: edge.weight,
description: edge.description,
keywords: edge.keywords,
source_id: edge.source_id
}, target) YIELD rel
RETURN count(*)
"""
set_displayname_and_labels_query = """
MATCH (n)
SET n.displayName = n.id
WITH n
CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
RETURN count(*)
"""
MATCH (n)
SET n.displayName = n.id
WITH n
CALL apoc.create.setLabels(n, [n.entity_type]) YIELD node
RETURN count(*)
"""
# 创建Neo4j驱动连接
driver = GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))
@ -196,7 +202,3 @@ def main():
finally:
# 关闭数据库连接
driver.close()
if __name__ == "__main__":
main()

@ -0,0 +1,109 @@
import re
from typing import List, Tuple
from py2neo import Graph, Node, Relationship, Subgraph
from Config import Config
def clear(db):
# 清空数据
db.run("MATCH (n) DETACH DELETE n")
# 分步删除约束和索引
try:
# 删除约束
constraints = db.run("SHOW CONSTRAINTS YIELD name").data()
for constr in constraints:
db.run(f"DROP CONSTRAINT `{constr['name']}`")
# 删除索引
indexes = db.run("SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP'").data()
for idx in indexes:
db.run(f"DROP INDEX `{idx['name']}`")
except Exception as e:
print(f"删除操作失败: {e}")
def create_subgraph(db: Graph, nodes: List[Node], relations: List[Tuple[Node, str, Node]]) -> None:
"""统一创建子图"""
subgraph = Subgraph(
nodes=nodes,
relationships=[Relationship(start, rel_type, end) for start, rel_type, end in relations]
)
db.create(subgraph)
def tx_create(db: Graph, nodes: List[Node], relations: List[Tuple[Node, str, Node]]) -> None:
"""事务方式创建数据"""
try:
tx = db.begin()
subgraph = Subgraph(
nodes=nodes,
relationships=[Relationship(start, rel_type, end) for start, rel_type, end in relations]
)
tx.create(subgraph)
db.commit(tx)
except Exception as e:
db.rollback(tx)
print(f"事务操作失败: {str(e)}")
raise
class Neo4jExecutor:
# 添加类变量存储连接配置
NEO4J_URI = Config.NEO4J_URI
NEO4J_AUTH = Config.NEO4J_AUTH
def __init__(self, uri=None, auth=None):
# 使用默认配置或传入参数
self.graph = Graph(uri or self.NEO4J_URI,
auth=auth or self.NEO4J_AUTH)
@classmethod
def create_default(cls):
"""使用默认配置创建执行器"""
return cls(cls.NEO4J_URI, cls.NEO4J_AUTH)
# 新增文本执行方法
def execute_cypher_text(self, cypher_text: str) -> dict:
"""直接执行Cypher文本"""
stats = {'total': 0, 'success': 0, 'failed': 0}
try:
statements = re.split(r';\s*\n', cypher_text)
statements = [s.strip() for s in statements if s.strip()]
stats['total'] = len(statements)
for stmt in statements:
try:
self.graph.run(stmt)
stats['success'] += 1
except Exception as e:
stats['failed'] += 1
print(f"执行失败: {stmt[:50]}... \n错误: {str(e)[:100]}")
return stats
except Exception as e:
print(f"执行失败: {stmt[:100]}... \n完整错误: {str(e)}") # 原为str(e)[:100]
return stats
def execute_cypher_file(self, file_path: str) -> dict: # 确保方法名称正确
"""执行Cypher文件"""
stats = {'total': 0, 'success': 0, 'failed': 0}
try:
with open(file_path, 'r', encoding='utf-8') as f:
cypher_script = f.read()
statements = re.split(r';\s*\n', cypher_script)
statements = [s.strip() for s in statements if s.strip()]
stats['total'] = len(statements)
for stmt in statements:
try:
self.graph.run(stmt)
stats['success'] += 1
except Exception as e:
stats['failed'] += 1
print(f"执行失败: {stmt[:50]}... \n错误: {str(e)[:100]}")
return stats
except Exception as e:
print(f"文件错误: {str(e)}")
return stats
Loading…
Cancel
Save