From 2b433cdf07e83f7b0d9c0ff31e308f915d828ee8 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Fri, 11 Jul 2025 16:32:02 +0800 Subject: [PATCH] 'commit' --- dsLightRag/Test/T1_WriteToPg.py | 37 +----- dsLightRag/Test/T2_ReadFromPg.py | 48 +------- dsLightRag/Util/LightRagUtil.py | 36 ++++++ dsLightRag/Util/Neo4jExecutor.py | 109 ------------------ .../__pycache__/Neo4jExecutor.cpython-310.pyc | Bin 4193 -> 0 bytes 5 files changed, 43 insertions(+), 187 deletions(-) delete mode 100644 dsLightRag/Util/Neo4jExecutor.py delete mode 100644 dsLightRag/Util/__pycache__/Neo4jExecutor.cpython-310.pyc diff --git a/dsLightRag/Test/T1_WriteToPg.py b/dsLightRag/Test/T1_WriteToPg.py index 09cc8d03..5357392e 100644 --- a/dsLightRag/Test/T1_WriteToPg.py +++ b/dsLightRag/Test/T1_WriteToPg.py @@ -7,7 +7,7 @@ from lightrag.kg.shared_storage import initialize_pipeline_status from lightrag.utils import EmbeddingFunc from Config.Config import EMBED_DIM, EMBED_MAX_TOKEN_SIZE, LLM_MODEL_NAME -from Util.LightRagUtil import embedding_func, llm_model_func +from Util.LightRagUtil import embedding_func, llm_model_func, initialize_pg_rag # 在程序开始时添加以下配置 logging.basicConfig( @@ -23,44 +23,11 @@ handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - logger.addHandler(handler) WORKING_DIR = f"./dickens-pg" - logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) -# AGE -os.environ["AGE_GRAPH_NAME"] = "dickens" -os.environ["POSTGRES_HOST"] = "10.10.14.208" -os.environ["POSTGRES_PORT"] = "5432" -os.environ["POSTGRES_USER"] = "postgres" -os.environ["POSTGRES_PASSWORD"] = "postgres" -os.environ["POSTGRES_DATABASE"] = "rag" - -async def initialize_pg_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - llm_model_name=LLM_MODEL_NAME, - llm_model_max_async=4, - llm_model_max_token_size=32768, - enable_llm_cache_for_entity_extract=True, - embedding_func=EmbeddingFunc( - embedding_dim=EMBED_DIM, - max_token_size=EMBED_MAX_TOKEN_SIZE, - func=embedding_func - ), - kv_storage="PGKVStorage", - doc_status_storage="PGDocStatusStorage", - graph_storage="PGGraphStorage", - vector_storage="PGVectorStorage", - auto_manage_storages_states=False, - ) - await rag.initialize_storages() - await initialize_pipeline_status() - return rag - - async def main(): try: - rag = await initialize_pg_rag() + rag = await initialize_pg_rag(WORKING_DIR) with open(f"../Txt/sushi.txt", "r", encoding="utf-8") as f: await rag.ainsert(f.read()) finally: diff --git a/dsLightRag/Test/T2_ReadFromPg.py b/dsLightRag/Test/T2_ReadFromPg.py index 4d9e766b..40df6a81 100644 --- a/dsLightRag/Test/T2_ReadFromPg.py +++ b/dsLightRag/Test/T2_ReadFromPg.py @@ -8,7 +8,7 @@ from lightrag.kg.shared_storage import initialize_pipeline_status from lightrag.utils import EmbeddingFunc from Config.Config import LLM_MODEL_NAME, EMBED_DIM, EMBED_MAX_TOKEN_SIZE -from Util.LightRagUtil import configure_logging, print_stream, llm_model_func, embedding_func +from Util.LightRagUtil import configure_logging, print_stream, llm_model_func, embedding_func, initialize_pg_rag # 在程序开始时添加以下配置 logging.basicConfig( @@ -23,56 +23,18 @@ handler = logging.StreamHandler() handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) logger.addHandler(handler) -ROOT_DIR = '.' -WORKING_DIR = f"{ROOT_DIR}/dickens-pg" +WORKING_DIR = f"./dickens-pg" logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - -# AGE -os.environ["AGE_GRAPH_NAME"] = "dickens" -os.environ["POSTGRES_HOST"] = "10.10.14.208" -os.environ["POSTGRES_PORT"] = "5432" -os.environ["POSTGRES_USER"] = "postgres" -os.environ["POSTGRES_PASSWORD"] = "postgres" -os.environ["POSTGRES_DATABASE"] = "rag" - - -async def initialize_pg_rag(): - rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=llm_model_func, - llm_model_name=LLM_MODEL_NAME, - llm_model_max_async=4, - llm_model_max_token_size=32768, - enable_llm_cache_for_entity_extract=True, - embedding_func=EmbeddingFunc( - embedding_dim=EMBED_DIM, - max_token_size=EMBED_MAX_TOKEN_SIZE, - func=embedding_func - ), - kv_storage="PGKVStorage", - doc_status_storage="PGDocStatusStorage", - graph_storage="PGGraphStorage", - vector_storage="PGVectorStorage", - auto_manage_storages_states=False, - ) - - await rag.initialize_storages() - await initialize_pipeline_status() - - return rag async def main(): try: - rag = await initialize_pg_rag() + rag = await initialize_pg_rag(WORKING_DIR) resp = await rag.aquery( - #"苏轼的家人都有谁?", - "大灰怎么了?", + # "苏轼的家人都有谁?", + "苏轼与美食", param=QueryParam(mode="hybrid", stream=True), - # hybrid naive ) if inspect.isasyncgen(resp): await print_stream(resp) diff --git a/dsLightRag/Util/LightRagUtil.py b/dsLightRag/Util/LightRagUtil.py index 397b3742..d020de2a 100644 --- a/dsLightRag/Util/LightRagUtil.py +++ b/dsLightRag/Util/LightRagUtil.py @@ -143,3 +143,39 @@ def create_embedding_func(): base_url=EMBED_BASE_URL, ), ) + + + +# AGE +os.environ["AGE_GRAPH_NAME"] = "dickens" +os.environ["POSTGRES_HOST"] = "10.10.14.208" +os.environ["POSTGRES_PORT"] = "5432" +os.environ["POSTGRES_USER"] = "postgres" +os.environ["POSTGRES_PASSWORD"] = "postgres" +os.environ["POSTGRES_DATABASE"] = "rag" + + +async def initialize_pg_rag(WORKING_DIR): + rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + llm_model_name=LLM_MODEL_NAME, + llm_model_max_async=4, + llm_model_max_token_size=32768, + enable_llm_cache_for_entity_extract=True, + embedding_func=EmbeddingFunc( + embedding_dim=EMBED_DIM, + max_token_size=EMBED_MAX_TOKEN_SIZE, + func=embedding_func + ), + kv_storage="PGKVStorage", + doc_status_storage="PGDocStatusStorage", + graph_storage="PGGraphStorage", + vector_storage="PGVectorStorage", + auto_manage_storages_states=False, + ) + + await rag.initialize_storages() + await initialize_pipeline_status() + + return rag \ No newline at end of file diff --git a/dsLightRag/Util/Neo4jExecutor.py b/dsLightRag/Util/Neo4jExecutor.py deleted file mode 100644 index 28f55cdb..00000000 --- a/dsLightRag/Util/Neo4jExecutor.py +++ /dev/null @@ -1,109 +0,0 @@ -import re -from typing import List, Tuple - -from py2neo import Graph, Node, Relationship, Subgraph -from Config import Config - -def clear(db): - # 清空数据 - db.run("MATCH (n) DETACH DELETE n") - - # 分步删除约束和索引 - try: - # 删除约束 - constraints = db.run("SHOW CONSTRAINTS YIELD name").data() - for constr in constraints: - db.run(f"DROP CONSTRAINT `{constr['name']}`") - - # 删除索引 - indexes = db.run("SHOW INDEXES YIELD name, type WHERE type <> 'LOOKUP'").data() - for idx in indexes: - db.run(f"DROP INDEX `{idx['name']}`") - except Exception as e: - print(f"删除操作失败: {e}") - -def create_subgraph(db: Graph, nodes: List[Node], relations: List[Tuple[Node, str, Node]]) -> None: - """统一创建子图""" - subgraph = Subgraph( - nodes=nodes, - relationships=[Relationship(start, rel_type, end) for start, rel_type, end in relations] - ) - db.create(subgraph) - -def tx_create(db: Graph, nodes: List[Node], relations: List[Tuple[Node, str, Node]]) -> None: - """事务方式创建数据""" - try: - tx = db.begin() - subgraph = Subgraph( - nodes=nodes, - relationships=[Relationship(start, rel_type, end) for start, rel_type, end in relations] - ) - tx.create(subgraph) - db.commit(tx) - except Exception as e: - db.rollback(tx) - print(f"事务操作失败: {str(e)}") - raise - -class Neo4jExecutor: - # 添加类变量存储连接配置 - NEO4J_URI = Config.NEO4J_URI - NEO4J_AUTH = Config.NEO4J_AUTH - - def __init__(self, uri=None, auth=None): - # 使用默认配置或传入参数 - self.graph = Graph(uri or self.NEO4J_URI, - auth=auth or self.NEO4J_AUTH) - - @classmethod - def create_default(cls): - """使用默认配置创建执行器""" - return cls(cls.NEO4J_URI, cls.NEO4J_AUTH) - - # 新增文本执行方法 - def execute_cypher_text(self, cypher_text: str) -> dict: - """直接执行Cypher文本""" - stats = {'total': 0, 'success': 0, 'failed': 0} - try: - statements = re.split(r';\s*\n', cypher_text) - statements = [s.strip() for s in statements if s.strip()] - - stats['total'] = len(statements) - - for stmt in statements: - try: - self.graph.run(stmt) - stats['success'] += 1 - except Exception as e: - stats['failed'] += 1 - print(f"执行失败: {stmt[:50]}... \n错误: {str(e)[:100]}") - - return stats - except Exception as e: - print(f"执行失败: {stmt[:100]}... \n完整错误: {str(e)}") # 原为str(e)[:100] - return stats - - def execute_cypher_file(self, file_path: str) -> dict: # 确保方法名称正确 - """执行Cypher文件""" - stats = {'total': 0, 'success': 0, 'failed': 0} - try: - with open(file_path, 'r', encoding='utf-8') as f: - cypher_script = f.read() - statements = re.split(r';\s*\n', cypher_script) - statements = [s.strip() for s in statements if s.strip()] - - stats['total'] = len(statements) - - for stmt in statements: - try: - self.graph.run(stmt) - stats['success'] += 1 - except Exception as e: - stats['failed'] += 1 - print(f"执行失败: {stmt[:50]}... \n错误: {str(e)[:100]}") - - return stats - - except Exception as e: - print(f"文件错误: {str(e)}") - return stats \ No newline at end of file diff --git a/dsLightRag/Util/__pycache__/Neo4jExecutor.cpython-310.pyc b/dsLightRag/Util/__pycache__/Neo4jExecutor.cpython-310.pyc deleted file mode 100644 index d74b814483ae2ea6c12109956f9a57b089cedf96..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4193 zcma)9|8pBh6~Ep4qLXaNj#EO?x*fHJCW6#7B_$;_!5Ld5X(L=rd{1{dEr7h9fr2eoMj^&VIgiams9TSF<2O$CUVc>t43f-CsyI%3`1!J!Y^T*1K%5-X)rjF*0E!c5K3)jZ}gvYc%gmWlvVh9#2@x zJ|?o|Ic23vjH~W(?qy}ZRP^WaW#vv(&duUzf+%OWRGi7r<_Ip+xEy$*O@`t2e+!~! ztb$e62r;S$bepb`RkCK(NsWvcXQFkgd(9eU)*jW-1| zv6EW{<7btH1NX_<>Onp6L~11YgXGpp`{Q!4%;QtXljF&tId~+#cXVv*l~X78R=f3z zdRF{w`x zNobAcXXoU2c6R!d%zM)*UOIR_xxjN3SrP-~MP=tao)sbV_M;#ol$tT>lHDX`qI4d_ zXg_h^pbx~CVORi!3}qJ~u6z|1&A?KQ;Ibl$ITMDPmjw;HbtHWS8q%l{Mi^Z-YIMX{ zBFtQ&bra%&)O5r!3u4-?^nvQ#Tfe#Ump9hmzPx_x;hF48T0^Fxl*Bgj@TFcNVQGVERj~*4-R7d5d+F);C~Er%R7xr7Ce9}@OYDW$HW##kxeTM#euN)@ z*OUO?&p5-XnX9yBA$&m(Ad4*krmcZ#EgotB6QOMfqtaoP4Atx<(uC=cKp#P>q3Kl6 z40k^M>H1IBHZJ~k{crCEPThF_j~gpD9_m!_7|aSx(2p%Ne}_fHlhB7J7EghwJ}wtB zfr++rhjiz`Z=cP^WIO~mlk1W+cN zPTx_vAfAC?fuQf$tqQmHhx_*NR1|PV;fLOfSiK}hBBT%A0d4;k&w@e@?zRi#eh6-V z90WJggc)fdnDbWJ0txiQqKqip0S0{?++f1)2i09| zyB=t&wS^aw9G+kp=b{&D@H>w;S%MsqAQg1Xo!kGo``(9}w|;f+#?6S85Ik>E&HB+G z2|!OQ=(%dOIJ*ne!F)f{PfQCnIu&rey|~o)!tU0Jjrpvnl~(jhILE;XK(7M)-&!rv ztCP&#%QrWE_Q8gB)UY<>#>KZbuKZr0=dYW}mL-|>RLHO7a@+?#oyq1s&h9%eP5WP) zF17^=+8rZS6AWTi6HEka9!#Ht2G2HnK~53*GGu1Y#2pCI&(L2je+7d9%PsZ9b0!n7sxD2I7&;_b%Q0=%d5&O$I%c zoz3p`8}Dqqf3rCVd5{VY2Pu1bsZ5U-1+F46>~td3=}&weULpN`6MJ6-q1hBFln9Wj zxL4e7FI&0#Ut z)4*4+4A(+XGsW1B&)_Ro54|3m-^cW`i7#rfm<2(+WFm!67zYghhZkTk_&W_Z(0WYM zdT8<2ja}@3ul5DsX_9#hb$V?;UF#c(ow`!%z}uZ$f7W;t-v$dSBC2*p&OG}fWJ;G8 zb0wB9&Wi8A&_mHCj=*S;QNN3=qpepgas}398c1ibfiCjfhMA@wOO3Z!QsGcW7=1uo}GOdLW5OdIfrC!dpI4Bv6< zTe$O6|Kf3wWc^+cuHjIJ!2c2(sv~>XE`g(o9&L5yK<$nph51^TAkt2G@%bVz>3T!h x*p0jy)OC8B`fm(wMVHIE&eBV6w-eV