diff --git a/dsLightRag/Doc/8、图数据库DozerDb.md b/dsLightRag/Doc/8、图数据库DozerDb.md index b3f3f6e6..e01e23c5 100644 --- a/dsLightRag/Doc/8、图数据库DozerDb.md +++ b/dsLightRag/Doc/8、图数据库DozerDb.md @@ -1 +1,3 @@ -https://dozerdb.org/ \ No newline at end of file +https://dozerdb.org/ + +https://milvus.io/ \ No newline at end of file diff --git a/dsLightRag/Doc/MathType/MathType-win-zh-7.8.2.441.exe b/dsLightRag/Doc/MathType/MathType-win-zh-7.8.2.441.exe deleted file mode 100644 index 8bd59d2c..00000000 Binary files a/dsLightRag/Doc/MathType/MathType-win-zh-7.8.2.441.exe and /dev/null differ diff --git a/dsLightRag/Doc/MathType/MathType_v7.x_Patch.exe b/dsLightRag/Doc/MathType/MathType_v7.x_Patch.exe deleted file mode 100644 index e3f0de7d..00000000 Binary files a/dsLightRag/Doc/MathType/MathType_v7.x_Patch.exe and /dev/null differ diff --git a/dsLightRag/Doc/MathType/录入化学方程式的办法.md b/dsLightRag/Doc/MathType/录入化学方程式的办法.md deleted file mode 100644 index e421effc..00000000 --- a/dsLightRag/Doc/MathType/录入化学方程式的办法.md +++ /dev/null @@ -1,33 +0,0 @@ -### 一、安装$MathType$ $7.8 $ - -由于Word中的公式编辑器在编辑数学、化学、生物等公式时,并不是特别方便,所以可以采用使用插件$MathType$的方式来快速录入$Word$中的公式。 - -安装、破解、使用,不再赘述。 - - - -### 二、手动调整公式格式 - -但由于格式的差别,其它软件并不能正确读取$MathType$制作的公式,为了让其它软件也能正确读取$Word$中$MathType$制作的公式,还需要一些手动的调整办法。 - -![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104228950.png) - -![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104322174.png) - -- 鼠标左键点击,然后Ctrl+C 复制到内存 - -![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104444709.png) - -- Ctrl+V 即可 - -![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104536077.png) - -### 三、测试一下 - -````cmd -pandoc -f docx -t markdown --extract-media ./images -o c:/1.md D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx -```` -MathType 输入空格 | 多行公式对齐 | 标志符号垂直对齐 | 大括号分段函数对齐 | 方程组对齐 | 矩阵对齐 | 公式编号对齐 | 花体字 空心字 | 论文排版 -https://blog.csdn.net/u013669912/article/details/142323661 - - diff --git a/dsLightRag/Doc/MathType/激活MathType.docx b/dsLightRag/Doc/MathType/激活MathType.docx deleted file mode 100644 index 43ed5fa8..00000000 Binary files a/dsLightRag/Doc/MathType/激活MathType.docx and /dev/null differ diff --git a/dsLightRag/T3_TrainNeo4j.py b/dsLightRag/T3_TrainNeo4j.py deleted file mode 100644 index b354b83a..00000000 --- a/dsLightRag/T3_TrainNeo4j.py +++ /dev/null @@ -1,64 +0,0 @@ -import asyncio - -from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD -from Util.DocxUtil import get_docx_content_by_pandoc -from Util.LightRagUtil import configure_logging, initialize_rag -import os - -from Util.Neo4jExecutor import Neo4jExecutor - -# 数学 -KEMU = 'Chemistry' # Chinese,Math,Chemistry - -# 组装文件路径 -WORKING_DIR = "./Topic/" + KEMU - -# 文档文件 -docx_file = 'static/Txt/' + KEMU + '.docx' - -async def main(): - # 设置Neo4j连接参数 - os.environ["NEO4J_URI"] = NEO4J_URI - os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME - os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD - - # 清库 【视情况看看是不是需要注释掉】 - executor = Neo4jExecutor.create_default() - executor.graph.run("MATCH (n) DETACH DELETE n") - print("清库成功") - - # 注释掉或删除以下清理代码 - # files_to_delete = [ - # "graph_chunk_entity_relation.graphml", - # "kv_store_doc_status.json", - # "kv_store_full_docs.json", - # "kv_store_text_chunks.json", - # "vdb_chunks.json", - # "vdb_entities.json", - # "vdb_relationships.json", - # ] - - # 获取docx文件的内容 - content = get_docx_content_by_pandoc(docx_file) - - # 删除文件 - # for file in files_to_delete: - # file_path = os.path.join(WORKING_DIR, file) - # if os.path.exists(file_path): - # os.remove(file_path) - # print(f"Deleting old file:: {file_path}") - - try: - # 注意:默认设置使用NetworkX,使用Neo4J实现初始化LightRAG - rag = await initialize_rag(WORKING_DIR, graph_storage="Neo4JStorage") - await rag.ainsert(content) - print("\nIndexing completed successfully!") - except Exception as e: - print(f"An error occurred: {e}") - finally: - await rag.finalize_storages() - - -if __name__ == "__main__": - configure_logging() - asyncio.run(main()) diff --git a/dsLightRag/T4_QueryNeo4j.py b/dsLightRag/T4_QueryNeo4j.py deleted file mode 100644 index 62e8e9d5..00000000 --- a/dsLightRag/T4_QueryNeo4j.py +++ /dev/null @@ -1,52 +0,0 @@ -import asyncio -import inspect -import os - -from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD -from Util.LightRagUtil import configure_logging, initialize_rag, print_stream -from lightrag import QueryParam - -# 化学 -data = [ - # {"NAME": "Chemistry", "Q": "硝酸光照分解的化学反应方程式是什么", "ChineseName": "化学"}, - {"NAME": "Chemistry", "Q": "氢气与氧气燃烧的现象", "ChineseName": "化学"}, - {"NAME": "Math", "Q": "氧化铁与硝酸的化学反应方程式是什么", "ChineseName": "数学"}, - {"NAME": "Chinese", "Q": "氧化铁与硝酸的化学反应方程式是什么", "ChineseName": "语文"}] - -# 准备查询的科目 -KEMU = "Chemistry" - -# 查找索引号 -idx = [i for i, d in enumerate(data) if d["NAME"] == KEMU][0] - - -async def main(): - # 设置Neo4j连接参数 - os.environ["NEO4J_URI"] = NEO4J_URI - os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME - os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD - - try: - user_prompt = "\n 1、资料中提供化学反应方程式的,一定要严格按提供的Latex公式输出,绝对不允许对Latex公式进行修改 !" - user_prompt = user_prompt + "\n 2、如果资料中提供了图片的,一定要严格按照原文提供图片输出,不允许省略或不输出!" - user_prompt = user_prompt + "\n 3、资料中提到的知识内容,需要判断是否与本次问题相关,不相关的绝对不要输出!" - rag = await initialize_rag('Topic/' + data[idx]["NAME"],graph_storage="Neo4JStorage") # 加上使用Neo4JStorage - resp = await rag.aquery( - data[idx]["Q"], - param=QueryParam(mode="hybrid", stream=True, user_prompt=user_prompt), - # hybrid naive - ) - if inspect.isasyncgen(resp): - await print_stream(resp) - else: - print(resp) - except Exception as e: - print(f"An error occurred: {e}") - finally: - if rag: - await rag.finalize_storages() - - -if __name__ == "__main__": - configure_logging() - asyncio.run(main()) diff --git a/dsLightRag/lightrag_openai_neo4j_milvus_redis_demo.py b/dsLightRag/lightrag_openai_neo4j_milvus_redis_demo.py deleted file mode 100644 index d5920e60..00000000 --- a/dsLightRag/lightrag_openai_neo4j_milvus_redis_demo.py +++ /dev/null @@ -1,95 +0,0 @@ -import os -import asyncio -from lightrag import LightRAG, QueryParam -from lightrag.utils import EmbeddingFunc -from lightrag.kg.shared_storage import initialize_pipeline_status - -from Config.Config import EMBED_DIM, EMBED_MAX_TOKEN_SIZE, NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD -from Util.LightRagUtil import llm_model_func, embedding_func - -# WorkingDir -ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) -WORKING_DIR = os.path.join(ROOT_DIR, "myKG") -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) -print(f"WorkingDir: {WORKING_DIR}") - -# redis -os.environ["REDIS_URI"] = "redis://localhost:6379" - -# neo4j -BATCH_SIZE_NODES = 500 -BATCH_SIZE_EDGES = 100 -os.environ["NEO4J_URI"] = NEO4J_URI -os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME -os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD - -# milvus -os.environ["MILVUS_URI"] = "http://localhost:19530" -os.environ["MILVUS_USER"] = "root" -os.environ["MILVUS_PASSWORD"] = "Milvus" -os.environ["MILVUS_DB_NAME"] = "lightrag" - - -async def initialize_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - llm_model_max_token_size=32768, - embedding_func=EmbeddingFunc( - embedding_dim=EMBED_DIM, - max_token_size=EMBED_MAX_TOKEN_SIZE, - func=embedding_func - ), - chunk_token_size=512, - chunk_overlap_token_size=256, - kv_storage="RedisKVStorage", - graph_storage="Neo4JStorage", - vector_storage="MilvusVectorDBStorage", - doc_status_storage="RedisKVStorage", - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag - - -def main(): - # Initialize RAG instance - rag = asyncio.run(initialize_rag()) - - with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) - - # Perform naive search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="naive") - ) - ) - - # Perform local search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="local") - ) - ) - - # Perform global search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="global") - ) - ) - - # Perform hybrid search - print( - rag.query( - "What are the top themes in this story?", param=QueryParam(mode="hybrid") - ) - ) - - -if __name__ == "__main__": - main()