You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
45 lines
1.6 KiB
45 lines
1.6 KiB
import ijson
|
|
import os
|
|
|
|
from Tools.KG_Config import TOPIC
|
|
|
|
|
|
def parse_relationships(file_path):
|
|
# 检查文件是否存在
|
|
if not os.path.exists(file_path):
|
|
raise FileNotFoundError(f"文件不存在: {file_path}")
|
|
|
|
# 读取embedding_dim
|
|
embedding_dim = "未知维度"
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
embedding_dim = next(ijson.items(f, 'embedding_dim'), "未知维度")
|
|
|
|
# 读取关系数据
|
|
relationships = []
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
relationships = list(ijson.items(f, 'data.item'))
|
|
|
|
# 提取关键信息
|
|
result = {
|
|
"embedding_dim": embedding_dim,
|
|
"relationship_count": len(relationships),
|
|
"sample_relationships": relationships[:10] # 显示前3条示例
|
|
}
|
|
return result
|
|
|
|
if __name__ == "__main__":
|
|
file_path = rf"{TOPIC}\vdb_relationships.json"
|
|
try:
|
|
relationships = parse_relationships(file_path)
|
|
print(f"嵌入维度: {relationships['embedding_dim']}")
|
|
print(f"关系总数: {relationships['relationship_count']}")
|
|
print("\n示例关系:\n")
|
|
for i, rel in enumerate(relationships['sample_relationships'], 1):
|
|
print(f"关系 {i}:")
|
|
print(f" ID: {rel['__id__']}")
|
|
print(f" 源实体: {rel['src_id']}")
|
|
print(f" 目标实体: {rel['tgt_id']}")
|
|
print(f" 关系描述: {rel['content'][:50]}...") # 截断长文本
|
|
print(f" 来源块ID: {rel['source_id']}\n")
|
|
except Exception as e:
|
|
print(f"解析错误: {str(e)}") |