You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

24 lines
864 B

4 days ago
import ijson
import os
5 days ago
4 days ago
from Tools.KG_Config import TOPIC
5 days ago
# 文件路径
4 days ago
file_path = rf"{TOPIC}\kv_store_text_chunks.json"
5 days ago
4 days ago
# 检查文件是否存在
if not os.path.exists(file_path):
raise FileNotFoundError(f"文件不存在: {file_path}")
5 days ago
4 days ago
# 使用ijson流式读取JSON文件
with open(file_path, 'r', encoding='utf-8') as f:
# 流式迭代所有文本块
print("正在读取文本块...")
for chunk_id, chunk_info in ijson.kvitems(f, ''):
print(f"块ID: {chunk_id}")
print(f"所属文档ID: {chunk_info['full_doc_id']}")
print(f"块序号: {chunk_info['chunk_order_index']}")
print(f"Token数量: {chunk_info['tokens']}")
#print(f"内容预览: {chunk_info['content'][:100]}...") # 打印前100字符
print(f"来源文件: {chunk_info['file_path']}")
print("---")