'commit'

2 weeks ago · 0c7d934f31
parent 852b0540c2
commit 0c7d934f31
8 changed files with 3 additions and 245 deletions
--- a/dsLightRag/Doc/8、图数据库DozerDb.md
+++ b/dsLightRag/Doc/8、图数据库DozerDb.md
@ -1 +1,3 @@
-https://dozerdb.org/
+https://dozerdb.org/
+
+https://milvus.io/
--- a/dsLightRag/Doc/MathType/MathType-win-zh-7.8.2.441.exe
+++ b/dsLightRag/Doc/MathType/MathType-win-zh-7.8.2.441.exe
--- a/dsLightRag/Doc/MathType/MathType_v7.x_Patch.exe
+++ b/dsLightRag/Doc/MathType/MathType_v7.x_Patch.exe
--- a/dsLightRag/Doc/MathType/录入化学方程式的办法.md
+++ b/dsLightRag/Doc/MathType/录入化学方程式的办法.md
@ -1,33 +0,0 @@
-### 一、安装$MathType$ $7.8 $
-
-由于Word中的公式编辑器在编辑数学、化学、生物等公式时，并不是特别方便，所以可以采用使用插件$MathType$的方式来快速录入$Word$中的公式。
-
-安装、破解、使用，不再赘述。
-
-
-
-### 二、手动调整公式格式 
-
-但由于格式的差别，其它软件并不能正确读取$MathType$制作的公式，为了让其它软件也能正确读取$Word$中$MathType$制作的公式，还需要一些手动的调整办法。
-
-![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104228950.png)
-
-![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104322174.png)
-
- 鼠标左键点击，然后Ctrl+C 复制到内存
-
-![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104444709.png)
-
- Ctrl+V 即可
-
-![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104536077.png)
-
-### 三、测试一下
-
-````cmd
-pandoc -f docx -t markdown --extract-media ./images -o c:/1.md D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx
-````
-MathType 输入空格 | 多行公式对齐 | 标志符号垂直对齐 | 大括号分段函数对齐 | 方程组对齐 | 矩阵对齐 | 公式编号对齐 | 花体字 空心字 | 论文排版
-https://blog.csdn.net/u013669912/article/details/142323661
-
-
--- a/dsLightRag/Doc/MathType/激活MathType.docx
+++ b/dsLightRag/Doc/MathType/激活MathType.docx
--- a/dsLightRag/T3_TrainNeo4j.py
+++ b/dsLightRag/T3_TrainNeo4j.py
@ -1,64 +0,0 @@
-import asyncio
-
-from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
-from Util.DocxUtil import get_docx_content_by_pandoc
-from Util.LightRagUtil import configure_logging, initialize_rag
-import os
-
-from Util.Neo4jExecutor import Neo4jExecutor
-
-# 数学
-KEMU = 'Chemistry'  # Chinese,Math,Chemistry
-
-# 组装文件路径
-WORKING_DIR = "./Topic/" + KEMU
-
-# 文档文件
-docx_file = 'static/Txt/' + KEMU + '.docx'
-
-async def main():
-    # 设置Neo4j连接参数
-    os.environ["NEO4J_URI"] = NEO4J_URI
-    os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
-    os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
-
-    # 清库 【视情况看看是不是需要注释掉】
-    executor = Neo4jExecutor.create_default()
-    executor.graph.run("MATCH (n) DETACH DELETE n")
-    print("清库成功")
-
-    # 注释掉或删除以下清理代码
-    # files_to_delete = [
-    #     "graph_chunk_entity_relation.graphml",
-    #     "kv_store_doc_status.json",
-    #     "kv_store_full_docs.json",
-    #     "kv_store_text_chunks.json",
-    #     "vdb_chunks.json",
-    #     "vdb_entities.json",
-    #     "vdb_relationships.json",
-    # ]
-
-    # 获取docx文件的内容
-    content = get_docx_content_by_pandoc(docx_file)
-
-    # 删除文件
-    # for file in files_to_delete:
-    #     file_path = os.path.join(WORKING_DIR, file)
-    #     if os.path.exists(file_path):
-    #         os.remove(file_path)
-    #         print(f"Deleting old file:: {file_path}")
-
-    try:
-        # 注意：默认设置使用NetworkX,使用Neo4J实现初始化LightRAG
-        rag = await initialize_rag(WORKING_DIR, graph_storage="Neo4JStorage")
-        await rag.ainsert(content)
-        print("\nIndexing completed successfully!")
-    except Exception as e:
-        print(f"An error occurred: {e}")
-    finally:
-        await rag.finalize_storages()
-
-
-if __name__ == "__main__":
-    configure_logging()
-    asyncio.run(main())
--- a/dsLightRag/T4_QueryNeo4j.py
+++ b/dsLightRag/T4_QueryNeo4j.py
@ -1,52 +0,0 @@
-import asyncio
-import inspect
-import os
-
-from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
-from Util.LightRagUtil import configure_logging, initialize_rag, print_stream
-from lightrag import QueryParam
-
-# 化学
-data = [
-    # {"NAME": "Chemistry", "Q": "硝酸光照分解的化学反应方程式是什么", "ChineseName": "化学"},
-    {"NAME": "Chemistry", "Q": "氢气与氧气燃烧的现象", "ChineseName": "化学"},
-    {"NAME": "Math", "Q": "氧化铁与硝酸的化学反应方程式是什么", "ChineseName": "数学"},
-    {"NAME": "Chinese", "Q": "氧化铁与硝酸的化学反应方程式是什么", "ChineseName": "语文"}]
-
-# 准备查询的科目
-KEMU = "Chemistry"
-
-# 查找索引号
-idx = [i for i, d in enumerate(data) if d["NAME"] == KEMU][0]
-
-
-async def main():
-    # 设置Neo4j连接参数
-    os.environ["NEO4J_URI"] = NEO4J_URI
-    os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
-    os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
-
-    try:
-        user_prompt = "\n 1、资料中提供化学反应方程式的，一定要严格按提供的Latex公式输出，绝对不允许对Latex公式进行修改 ！"
-        user_prompt = user_prompt + "\n 2、如果资料中提供了图片的，一定要严格按照原文提供图片输出，不允许省略或不输出！"
-        user_prompt = user_prompt + "\n 3、资料中提到的知识内容，需要判断是否与本次问题相关，不相关的绝对不要输出！"
-        rag = await initialize_rag('Topic/' + data[idx]["NAME"],graph_storage="Neo4JStorage") # 加上使用Neo4JStorage
-        resp = await rag.aquery(
-            data[idx]["Q"],
-            param=QueryParam(mode="hybrid", stream=True, user_prompt=user_prompt),
-            # hybrid naive
-        )
-        if inspect.isasyncgen(resp):
-            await print_stream(resp)
-        else:
-            print(resp)
-    except Exception as e:
-        print(f"An error occurred: {e}")
-    finally:
-        if rag:
-            await rag.finalize_storages()
-
-
-if __name__ == "__main__":
-    configure_logging()
-    asyncio.run(main())
--- a/dsLightRag/lightrag_openai_neo4j_milvus_redis_demo.py
+++ b/dsLightRag/lightrag_openai_neo4j_milvus_redis_demo.py
@ -1,95 +0,0 @@
-import os
-import asyncio
-from lightrag import LightRAG, QueryParam
-from lightrag.utils import EmbeddingFunc
-from lightrag.kg.shared_storage import initialize_pipeline_status
-
-from Config.Config import EMBED_DIM, EMBED_MAX_TOKEN_SIZE, NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
-from Util.LightRagUtil import llm_model_func, embedding_func
-
-# WorkingDir
-ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
-WORKING_DIR = os.path.join(ROOT_DIR, "myKG")
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-print(f"WorkingDir: {WORKING_DIR}")
-
-# redis
-os.environ["REDIS_URI"] = "redis://localhost:6379"
-
-# neo4j
-BATCH_SIZE_NODES = 500
-BATCH_SIZE_EDGES = 100
-os.environ["NEO4J_URI"] = NEO4J_URI
-os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
-os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
-
-# milvus
-os.environ["MILVUS_URI"] = "http://localhost:19530"
-os.environ["MILVUS_USER"] = "root"
-os.environ["MILVUS_PASSWORD"] = "Milvus"
-os.environ["MILVUS_DB_NAME"] = "lightrag"
-
-
-async def initialize_rag():
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=llm_model_func,
-        llm_model_max_token_size=32768,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=EMBED_DIM,
-            max_token_size=EMBED_MAX_TOKEN_SIZE,
-            func=embedding_func
-        ),
-        chunk_token_size=512,
-        chunk_overlap_token_size=256,
-        kv_storage="RedisKVStorage",
-        graph_storage="Neo4JStorage",
-        vector_storage="MilvusVectorDBStorage",
-        doc_status_storage="RedisKVStorage",
-    )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-def main():
-    # Initialize RAG instance
-    rag = asyncio.run(initialize_rag())
-
-    with open("./book.txt", "r", encoding="utf-8") as f:
-        rag.insert(f.read())
-
-    # Perform naive search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="naive")
-        )
-    )
-
-    # Perform local search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="local")
-        )
-    )
-
-    # Perform global search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="global")
-        )
-    )
-
-    # Perform hybrid search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
-        )
-    )
-
-
-if __name__ == "__main__":
-    main()