main
HuangHai 2 weeks ago
parent 852b0540c2
commit 0c7d934f31

@ -1 +1,3 @@
https://dozerdb.org/
https://dozerdb.org/
https://milvus.io/

@ -1,33 +0,0 @@
### 一、安装$MathType$ $7.8 $
由于Word中的公式编辑器在编辑数学、化学、生物等公式时并不是特别方便所以可以采用使用插件$MathType$的方式来快速录入$Word$中的公式。
安装、破解、使用,不再赘述。
### 二、手动调整公式格式
但由于格式的差别,其它软件并不能正确读取$MathType$制作的公式,为了让其它软件也能正确读取$Word$中$MathType$制作的公式,还需要一些手动的调整办法。
![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104228950.png)
![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104322174.png)
- 鼠标左键点击然后Ctrl+C 复制到内存
![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104444709.png)
- Ctrl+V 即可
![](https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/BlogImages/%7Byear%7D/%7Bmonth%7D/%7Bmd5%7D.%7BextName%7D/20250630104536077.png)
### 三、测试一下
````cmd
pandoc -f docx -t markdown --extract-media ./images -o c:/1.md D:\dsWork\dsProject\dsRag\Test\化学方程式_CHEMISTRY_1.docx
````
MathType 输入空格 | 多行公式对齐 | 标志符号垂直对齐 | 大括号分段函数对齐 | 方程组对齐 | 矩阵对齐 | 公式编号对齐 | 花体字 空心字 | 论文排版
https://blog.csdn.net/u013669912/article/details/142323661

@ -1,64 +0,0 @@
import asyncio
from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
from Util.DocxUtil import get_docx_content_by_pandoc
from Util.LightRagUtil import configure_logging, initialize_rag
import os
from Util.Neo4jExecutor import Neo4jExecutor
# 数学
KEMU = 'Chemistry' # Chinese,Math,Chemistry
# 组装文件路径
WORKING_DIR = "./Topic/" + KEMU
# 文档文件
docx_file = 'static/Txt/' + KEMU + '.docx'
async def main():
# 设置Neo4j连接参数
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
# 清库 【视情况看看是不是需要注释掉】
executor = Neo4jExecutor.create_default()
executor.graph.run("MATCH (n) DETACH DELETE n")
print("清库成功")
# 注释掉或删除以下清理代码
# files_to_delete = [
# "graph_chunk_entity_relation.graphml",
# "kv_store_doc_status.json",
# "kv_store_full_docs.json",
# "kv_store_text_chunks.json",
# "vdb_chunks.json",
# "vdb_entities.json",
# "vdb_relationships.json",
# ]
# 获取docx文件的内容
content = get_docx_content_by_pandoc(docx_file)
# 删除文件
# for file in files_to_delete:
# file_path = os.path.join(WORKING_DIR, file)
# if os.path.exists(file_path):
# os.remove(file_path)
# print(f"Deleting old file:: {file_path}")
try:
# 注意默认设置使用NetworkX,使用Neo4J实现初始化LightRAG
rag = await initialize_rag(WORKING_DIR, graph_storage="Neo4JStorage")
await rag.ainsert(content)
print("\nIndexing completed successfully!")
except Exception as e:
print(f"An error occurred: {e}")
finally:
await rag.finalize_storages()
if __name__ == "__main__":
configure_logging()
asyncio.run(main())

@ -1,52 +0,0 @@
import asyncio
import inspect
import os
from Config.Config import NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
from Util.LightRagUtil import configure_logging, initialize_rag, print_stream
from lightrag import QueryParam
# 化学
data = [
# {"NAME": "Chemistry", "Q": "硝酸光照分解的化学反应方程式是什么", "ChineseName": "化学"},
{"NAME": "Chemistry", "Q": "氢气与氧气燃烧的现象", "ChineseName": "化学"},
{"NAME": "Math", "Q": "氧化铁与硝酸的化学反应方程式是什么", "ChineseName": "数学"},
{"NAME": "Chinese", "Q": "氧化铁与硝酸的化学反应方程式是什么", "ChineseName": "语文"}]
# 准备查询的科目
KEMU = "Chemistry"
# 查找索引号
idx = [i for i, d in enumerate(data) if d["NAME"] == KEMU][0]
async def main():
# 设置Neo4j连接参数
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
try:
user_prompt = "\n 1、资料中提供化学反应方程式的一定要严格按提供的Latex公式输出绝对不允许对Latex公式进行修改 "
user_prompt = user_prompt + "\n 2、如果资料中提供了图片的一定要严格按照原文提供图片输出不允许省略或不输出"
user_prompt = user_prompt + "\n 3、资料中提到的知识内容需要判断是否与本次问题相关不相关的绝对不要输出"
rag = await initialize_rag('Topic/' + data[idx]["NAME"],graph_storage="Neo4JStorage") # 加上使用Neo4JStorage
resp = await rag.aquery(
data[idx]["Q"],
param=QueryParam(mode="hybrid", stream=True, user_prompt=user_prompt),
# hybrid naive
)
if inspect.isasyncgen(resp):
await print_stream(resp)
else:
print(resp)
except Exception as e:
print(f"An error occurred: {e}")
finally:
if rag:
await rag.finalize_storages()
if __name__ == "__main__":
configure_logging()
asyncio.run(main())

@ -1,95 +0,0 @@
import os
import asyncio
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc
from lightrag.kg.shared_storage import initialize_pipeline_status
from Config.Config import EMBED_DIM, EMBED_MAX_TOKEN_SIZE, NEO4J_URI, NEO4J_USERNAME, NEO4J_PASSWORD
from Util.LightRagUtil import llm_model_func, embedding_func
# WorkingDir
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
WORKING_DIR = os.path.join(ROOT_DIR, "myKG")
if not os.path.exists(WORKING_DIR):
os.mkdir(WORKING_DIR)
print(f"WorkingDir: {WORKING_DIR}")
# redis
os.environ["REDIS_URI"] = "redis://localhost:6379"
# neo4j
BATCH_SIZE_NODES = 500
BATCH_SIZE_EDGES = 100
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
# milvus
os.environ["MILVUS_URI"] = "http://localhost:19530"
os.environ["MILVUS_USER"] = "root"
os.environ["MILVUS_PASSWORD"] = "Milvus"
os.environ["MILVUS_DB_NAME"] = "lightrag"
async def initialize_rag():
rag = LightRAG(
working_dir=WORKING_DIR,
llm_model_func=llm_model_func,
llm_model_max_token_size=32768,
embedding_func=EmbeddingFunc(
embedding_dim=EMBED_DIM,
max_token_size=EMBED_MAX_TOKEN_SIZE,
func=embedding_func
),
chunk_token_size=512,
chunk_overlap_token_size=256,
kv_storage="RedisKVStorage",
graph_storage="Neo4JStorage",
vector_storage="MilvusVectorDBStorage",
doc_status_storage="RedisKVStorage",
)
await rag.initialize_storages()
await initialize_pipeline_status()
return rag
def main():
# Initialize RAG instance
rag = asyncio.run(initialize_rag())
with open("./book.txt", "r", encoding="utf-8") as f:
rag.insert(f.read())
# Perform naive search
print(
rag.query(
"What are the top themes in this story?", param=QueryParam(mode="naive")
)
)
# Perform local search
print(
rag.query(
"What are the top themes in this story?", param=QueryParam(mode="local")
)
)
# Perform global search
print(
rag.query(
"What are the top themes in this story?", param=QueryParam(mode="global")
)
)
# Perform hybrid search
print(
rag.query(
"What are the top themes in this story?", param=QueryParam(mode="hybrid")
)
)
if __name__ == "__main__":
main()
Loading…
Cancel
Save