'commit'

4 days ago · 1e19c64435
parent dbb7ba2f07
commit 1e19c64435
5 changed files with 113 additions and 0 deletions
--- a/dsLightRag/Tools/T1_GetEntity.py
+++ b/dsLightRag/Tools/T1_GetEntity.py
@ -0,0 +1,16 @@
+import json
+
+# 文件路径
+file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\ChuZhongShuXue\vdb_entities.json"
+
+# 读取并解析JSON文件
+with open(file_path, 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+# 提取所有实体名称
+entities = [item['entity_name'] for item in data['data']]
+
+# 打印实体列表
+print("文件中的实体列表：")
+for entity in entities:
+    print(f"- {entity}")
--- a/dsLightRag/Tools/T2_GetChunk.py
+++ b/dsLightRag/Tools/T2_GetChunk.py
@ -0,0 +1,22 @@
+import json
+
+# 文件路径
+file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\ChuZhongShuXue\vdb_chunks.json"
+
+# 读取并解析JSON文件
+with open(file_path, 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+# 提取所有块信息
+chunks = data.get('data', [])
+
+# 打印块数量和详细信息
+print(f"共找到 {len(chunks)} 个块：")
+for i, chunk in enumerate(chunks, 1):
+    print(f"块 {i}:")
+    print(f"ID: {chunk.get('__id__')}")
+    print(f"创建时间: {chunk.get('__created_at__')}")
+    print(f"文档ID: {chunk.get('full_doc_id')}")
+    print(f"文件路径: {chunk.get('file_path')}")
+    print(f"内容预览: {chunk.get('content', '')}")  # 显示前100字符
+    print("---")
--- a/dsLightRag/Tools/T3_ReadDocStatus.py
+++ b/dsLightRag/Tools/T3_ReadDocStatus.py
@ -0,0 +1,21 @@
+import json
+import os
+
+# 文件路径
+file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\JiHe\kv_store_doc_status.json"
+
+# 读取并解析JSON文件
+with open(file_path, 'r', encoding='utf-8') as f:
+    doc_status_data = json.load(f)
+
+# 遍历文档状态信息
+for doc_id, status_info in doc_status_data.items():
+    print(f"文档ID: {doc_id}")
+    print(f"状态: {status_info['status']}")
+    print(f"分块数量: {status_info['chunks_count']}")
+    #print(f"内容摘要: {status_info['content_summary'][:100]}...")  # 打印前100字符
+    #print(f"内容长度: {status_info['content_length']}字符")
+    #print(f"创建时间: {status_info['created_at']}")
+    #print(f"更新时间: {status_info['updated_at']}")
+    #print(f"文件路径: {status_info['file_path']}")
+    print("---")
--- a/dsLightRag/Tools/T4_ReadTextChunks.py
+++ b/dsLightRag/Tools/T4_ReadTextChunks.py
@ -0,0 +1,18 @@
+import json
+
+# 文件路径
+file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\JiHe\kv_store_text_chunks.json"
+
+# 读取并解析JSON文件
+with open(file_path, 'r', encoding='utf-8') as f:
+    text_chunks = json.load(f)
+
+# 遍历所有文本块
+for chunk_id, chunk_info in text_chunks.items():
+    print(f"块ID: {chunk_id}")
+    print(f"所属文档ID: {chunk_info['full_doc_id']}")
+    print(f"块序号: {chunk_info['chunk_order_index']}")
+    print(f"Token数量: {chunk_info['tokens']}")
+    #print(f"内容预览: {chunk_info['content'][:100]}...")  # 打印前100字符
+    print(f"来源文件: {chunk_info['file_path']}")
+    print("---")
--- a/dsLightRag/Tools/T5_ReadRelationships.py
+++ b/dsLightRag/Tools/T5_ReadRelationships.py
@ -0,0 +1,36 @@
+import json
+import os
+
+def parse_relationships(file_path):
+    # 检查文件是否存在
+    if not os.path.exists(file_path):
+        raise FileNotFoundError(f"文件不存在: {file_path}")
+
+    # 读取并解析JSON文件
+    with open(file_path, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+
+    # 提取关键信息
+    result = {
+        "embedding_dim": data.get("embedding_dim", "未知维度"),
+        "relationship_count": len(data.get("data", [])),
+        "sample_relationships": data.get("data", [])[:3]  # 显示前3条示例
+    }
+    return result
+
+if __name__ == "__main__":
+    file_path = r"d:\dsWork\dsProject\dsLightRag\Topic\JiHe\vdb_relationships.json"
+    try:
+        relationships = parse_relationships(file_path)
+        print(f"嵌入维度: {relationships['embedding_dim']}")
+        print(f"关系总数: {relationships['relationship_count']}")
+        print("\n示例关系:\n")
+        for i, rel in enumerate(relationships['sample_relationships'], 1):
+            print(f"关系 {i}:")
+            print(f"  ID: {rel['__id__']}")
+            print(f"  源实体: {rel['src_id']}")
+            print(f"  目标实体: {rel['tgt_id']}")
+            print(f"  关系描述: {rel['content'][:50]}...")  # 截断长文本
+            print(f"  来源块ID: {rel['source_id']}\n")
+    except Exception as e:
+        print(f"解析错误: {str(e)}")