main
HuangHai 5 days ago
parent 8878361126
commit 153b2dadc1

@ -1,17 +1,13 @@
import json import ijson
import os
from Tools.KG_Config import TOPIC from Tools.KG_Config import TOPIC
# 文件路径 # 文件路径
file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\kv_store_doc_status.json" file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\kv_store_doc_status.json"
# 读取并解析JSON文件 # 使用ijson流式读取JSON文件
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
doc_status_data = json.load(f) # 流式迭代文档ID和状态信息
for doc_id, status_info in ijson.kvitems(f, ''):
# 遍历文档状态信息
for doc_id, status_info in doc_status_data.items():
print(f"文档ID: {doc_id}") print(f"文档ID: {doc_id}")
print(f"状态: {status_info['status']}") print(f"状态: {status_info['status']}")
print(f"分块数量: {status_info['chunks_count']}") print(f"分块数量: {status_info['chunks_count']}")

@ -1,20 +1,22 @@
import json import ijson
import os
from Tools.KG_Config import TOPIC from Tools.KG_Config import TOPIC
# 文件路径 # 文件路径
file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\vdb_chunks.json" file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\vdb_chunks.json"
# 读取并解析JSON文件 # 检查文件是否存在
with open(file_path, 'r', encoding='utf-8') as f: if not os.path.exists(file_path):
data = json.load(f) raise FileNotFoundError(f"文件不存在: {file_path}")
# 提取所有块信息
chunks = data.get('data', [])
# 打印块数量和详细信息 # 使用ijson流式读取JSON文件
print(f"共找到 {len(chunks)} 个块:") with open(file_path, 'r', encoding='utf-8') as f:
for i, chunk in enumerate(chunks, 1): # 流式迭代所有块
chunks = ijson.items(f, 'data.item')
chunk_list = list(chunks)
print(f"共找到 {len(chunk_list)} 个块:")
for i, chunk in enumerate(chunk_list, 1):
print(f"{i}:") print(f"{i}:")
print(f"ID: {chunk.get('__id__')}") print(f"ID: {chunk.get('__id__')}")
print(f"创建时间: {chunk.get('__created_at__')}") print(f"创建时间: {chunk.get('__created_at__')}")

@ -1,16 +1,20 @@
import json import ijson
import os
from Tools.KG_Config import TOPIC from Tools.KG_Config import TOPIC
# 文件路径 # 文件路径
file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\kv_store_text_chunks.json" file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\kv_store_text_chunks.json"
# 读取并解析JSON文件 # 检查文件是否存在
with open(file_path, 'r', encoding='utf-8') as f: if not os.path.exists(file_path):
text_chunks = json.load(f) raise FileNotFoundError(f"文件不存在: {file_path}")
# 遍历所有文本块 # 使用ijson流式读取JSON文件
for chunk_id, chunk_info in text_chunks.items(): with open(file_path, 'r', encoding='utf-8') as f:
# 流式迭代所有文本块
print("正在读取文本块...")
for chunk_id, chunk_info in ijson.kvitems(f, ''):
print(f"块ID: {chunk_id}") print(f"块ID: {chunk_id}")
print(f"所属文档ID: {chunk_info['full_doc_id']}") print(f"所属文档ID: {chunk_info['full_doc_id']}")
print(f"块序号: {chunk_info['chunk_order_index']}") print(f"块序号: {chunk_info['chunk_order_index']}")

@ -1,18 +1,22 @@
import json import ijson
import os
from Tools.KG_Config import TOPIC from Tools.KG_Config import TOPIC
# 文件路径 # 文件路径
file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\vdb_entities.json" file_path = rf"d:\dsWork\dsProject\dsLightRag\Topic\{TOPIC}\vdb_entities.json"
# 读取并解析JSON文件 # 检查文件是否存在
with open(file_path, 'r', encoding='utf-8') as f: if not os.path.exists(file_path):
data = json.load(f) raise FileNotFoundError(f"文件不存在: {file_path}")
# 提取所有实体名称
entities = [item['entity_name'] for item in data['data']]
# 打印实体列表 # 使用ijson流式读取JSON文件
print("文件中的实体列表:") with open(file_path, 'r', encoding='utf-8') as f:
for entity in entities: # 流式迭代所有实体名称
print("正在读取实体...")
entity_names = ijson.items(f, 'data.item.entity_name')
entity_list = list(entity_names)
print(f"共找到 {len(entity_list)} 个实体:")
print("文件中的实体列表:")
for entity in entity_list:
print(f"- {entity}") print(f"- {entity}")

@ -1,4 +1,4 @@
import json import ijson
import os import os
from Tools.KG_Config import TOPIC from Tools.KG_Config import TOPIC
@ -9,15 +9,21 @@ def parse_relationships(file_path):
if not os.path.exists(file_path): if not os.path.exists(file_path):
raise FileNotFoundError(f"文件不存在: {file_path}") raise FileNotFoundError(f"文件不存在: {file_path}")
# 读取并解析JSON文件 # 读取embedding_dim
embedding_dim = "未知维度"
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f) embedding_dim = next(ijson.items(f, 'embedding_dim'), "未知维度")
# 读取关系数据
relationships = []
with open(file_path, 'r', encoding='utf-8') as f:
relationships = list(ijson.items(f, 'data.item'))
# 提取关键信息 # 提取关键信息
result = { result = {
"embedding_dim": data.get("embedding_dim", "未知维度"), "embedding_dim": embedding_dim,
"relationship_count": len(data.get("data", [])), "relationship_count": len(relationships),
"sample_relationships": data.get("data", [])[:3] # 显示前3条示例 "sample_relationships": relationships[:3] # 显示前3条示例
} }
return result return result

Loading…
Cancel
Save