diff --git a/dsLightRag/Tools/T1_GetEntity.py b/dsLightRag/Tools/T1_GetEntity.py new file mode 100644 index 00000000..5fe6d142 --- /dev/null +++ b/dsLightRag/Tools/T1_GetEntity.py @@ -0,0 +1,16 @@ +import json + +# 文件路径 +file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\ChuZhongShuXue\vdb_entities.json" + +# 读取并解析JSON文件 +with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + +# 提取所有实体名称 +entities = [item['entity_name'] for item in data['data']] + +# 打印实体列表 +print("文件中的实体列表:") +for entity in entities: + print(f"- {entity}") \ No newline at end of file diff --git a/dsLightRag/Tools/T2_GetChunk.py b/dsLightRag/Tools/T2_GetChunk.py new file mode 100644 index 00000000..c28aac13 --- /dev/null +++ b/dsLightRag/Tools/T2_GetChunk.py @@ -0,0 +1,22 @@ +import json + +# 文件路径 +file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\ChuZhongShuXue\vdb_chunks.json" + +# 读取并解析JSON文件 +with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + +# 提取所有块信息 +chunks = data.get('data', []) + +# 打印块数量和详细信息 +print(f"共找到 {len(chunks)} 个块:") +for i, chunk in enumerate(chunks, 1): + print(f"块 {i}:") + print(f"ID: {chunk.get('__id__')}") + print(f"创建时间: {chunk.get('__created_at__')}") + print(f"文档ID: {chunk.get('full_doc_id')}") + print(f"文件路径: {chunk.get('file_path')}") + print(f"内容预览: {chunk.get('content', '')}") # 显示前100字符 + print("---") \ No newline at end of file diff --git a/dsLightRag/Tools/T3_ReadDocStatus.py b/dsLightRag/Tools/T3_ReadDocStatus.py new file mode 100644 index 00000000..6a47143e --- /dev/null +++ b/dsLightRag/Tools/T3_ReadDocStatus.py @@ -0,0 +1,21 @@ +import json +import os + +# 文件路径 +file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\JiHe\kv_store_doc_status.json" + +# 读取并解析JSON文件 +with open(file_path, 'r', encoding='utf-8') as f: + doc_status_data = json.load(f) + +# 遍历文档状态信息 +for doc_id, status_info in doc_status_data.items(): + print(f"文档ID: {doc_id}") + print(f"状态: {status_info['status']}") + print(f"分块数量: {status_info['chunks_count']}") + #print(f"内容摘要: {status_info['content_summary'][:100]}...") # 打印前100字符 + #print(f"内容长度: {status_info['content_length']}字符") + #print(f"创建时间: {status_info['created_at']}") + #print(f"更新时间: {status_info['updated_at']}") + #print(f"文件路径: {status_info['file_path']}") + print("---") \ No newline at end of file diff --git a/dsLightRag/Tools/T4_ReadTextChunks.py b/dsLightRag/Tools/T4_ReadTextChunks.py new file mode 100644 index 00000000..09e38dea --- /dev/null +++ b/dsLightRag/Tools/T4_ReadTextChunks.py @@ -0,0 +1,18 @@ +import json + +# 文件路径 +file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\JiHe\kv_store_text_chunks.json" + +# 读取并解析JSON文件 +with open(file_path, 'r', encoding='utf-8') as f: + text_chunks = json.load(f) + +# 遍历所有文本块 +for chunk_id, chunk_info in text_chunks.items(): + print(f"块ID: {chunk_id}") + print(f"所属文档ID: {chunk_info['full_doc_id']}") + print(f"块序号: {chunk_info['chunk_order_index']}") + print(f"Token数量: {chunk_info['tokens']}") + #print(f"内容预览: {chunk_info['content'][:100]}...") # 打印前100字符 + print(f"来源文件: {chunk_info['file_path']}") + print("---") \ No newline at end of file diff --git a/dsLightRag/Tools/T5_ReadRelationships.py b/dsLightRag/Tools/T5_ReadRelationships.py new file mode 100644 index 00000000..95c9a1fc --- /dev/null +++ b/dsLightRag/Tools/T5_ReadRelationships.py @@ -0,0 +1,36 @@ +import json +import os + +def parse_relationships(file_path): + # 检查文件是否存在 + if not os.path.exists(file_path): + raise FileNotFoundError(f"文件不存在: {file_path}") + + # 读取并解析JSON文件 + with open(file_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + # 提取关键信息 + result = { + "embedding_dim": data.get("embedding_dim", "未知维度"), + "relationship_count": len(data.get("data", [])), + "sample_relationships": data.get("data", [])[:3] # 显示前3条示例 + } + return result + +if __name__ == "__main__": + file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\JiHe\vdb_relationships.json" + try: + relationships = parse_relationships(file_path) + print(f"嵌入维度: {relationships['embedding_dim']}") + print(f"关系总数: {relationships['relationship_count']}") + print("\n示例关系:\n") + for i, rel in enumerate(relationships['sample_relationships'], 1): + print(f"关系 {i}:") + print(f" ID: {rel['__id__']}") + print(f" 源实体: {rel['src_id']}") + print(f" 目标实体: {rel['tgt_id']}") + print(f" 关系描述: {rel['content'][:50]}...") # 截断长文本 + print(f" 来源块ID: {rel['source_id']}\n") + except Exception as e: + print(f"解析错误: {str(e)}") \ No newline at end of file