import logging import logging.config import os import numpy as np from lightrag import LightRAG from lightrag.kg.shared_storage import initialize_pipeline_status from lightrag.llm.openai import openai_complete_if_cache, openai_embed from lightrag.rerank import jina_rerank, custom_rerank from lightrag.utils import EmbeddingFunc, logger, set_verbose_debug # 阿里云的配置信息 ALY_AK = 'LTAI5tE4tgpGcKWhbZg6C4bh' ALY_SK = 'oizcTOZ8izbGUouboC00RcmGE8vBQ1' # 大模型 【DeepSeek深度求索官方】训练时用这个 LLM_API_KEY = "sk-44ae895eeb614aa1a9c6460579e322f1" LLM_BASE_URL = "https://api.deepseek.com" LLM_MODEL_NAME = "deepseek-chat" # # 阿里云提供的大模型服务 【阿里云在处理文字材料时,容易引发绿网拦截,导致数据上报异常】 # LLM_API_KEY = "sk-f6da0c787eff4b0389e4ad03a35a911f" # LLM_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1" # LLM_MODEL_NAME = "qwen-plus" # 不要使用通义千问,会导致化学方程式不正确! # #LLM_MODEL_NAME = "deepseek-v3" # 嵌入向量模型 EMBED_MODEL_NAME = "BAAI/bge-m3" EMBED_API_KEY = "sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl" EMBED_BASE_URL = "https://api.siliconflow.cn/v1" EMBED_DIM = 1024 EMBED_MAX_TOKEN_SIZE = 8192 # 重排模型 RERANK_MODEL='BAAI/bge-reranker-v2-m3' RERANK_BASE_URL='https://api.siliconflow.cn/v1/rerank' RERANK_BINDING_API_KEY='sk-pbqibyjwhrgmnlsmdygplahextfaclgnedetybccknxojlyl' # 图数据库 NEO4J_URI = "bolt://localhost:7687" NEO4J_USERNAME = "neo4j" NEO4J_PASSWORD = "DsideaL147258369" NEO4J_AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD) # POSTGRESQL配置信息 POSTGRES_HOST = "10.10.14.208" POSTGRES_PORT = 5432 POSTGRES_USER = "postgres" POSTGRES_PASSWORD = "postgres" POSTGRES_DATABASE = "rag" async def print_stream(stream): async for chunk in stream: if chunk: print(chunk, end="", flush=True) def configure_logging(): for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "lightrag"]: logger_instance = logging.getLogger(logger_name) logger_instance.handlers = [] logger_instance.filters = [] log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath( os.path.join(log_dir, "./Logs/lightrag.log") ) print(f"\nLightRAG log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_dir), exist_ok=True) log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) logging.config.dictConfig( { "version": 1, "disable_existing_loggers": False, "formatters": { "default": { "format": "%(levelname)s: %(message)s", }, "detailed": { "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s", }, }, "handlers": { "console": { "formatter": "default", "class": "logging.StreamHandler", "stream": "ext://sys.stderr", }, "file": { "formatter": "detailed", "class": "logging.handlers.RotatingFileHandler", "filename": log_file_path, "maxBytes": log_max_bytes, "backupCount": log_backup_count, "encoding": "utf-8", }, }, "loggers": { "lightrag": { "handlers": ["console", "file"], "level": "INFO", "propagate": False, }, }, } ) logger.setLevel(logging.INFO) set_verbose_debug(os.getenv("VERBOSE_DEBUG", "false").lower() == "true") async def llm_model_func( prompt, system_prompt=None, history_messages=None, **kwargs ) -> str: return await openai_complete_if_cache( os.getenv("LLM_MODEL", LLM_MODEL_NAME), prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=LLM_API_KEY, base_url=LLM_BASE_URL, **kwargs, ) async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embed( texts, model=EMBED_MODEL_NAME, api_key=EMBED_API_KEY, base_url=EMBED_BASE_URL ) async def rerank_func(query: str, documents: list, top_k: int = None, **kwargs): return await custom_rerank( query=query, documents=documents, model=RERANK_MODEL, base_url=RERANK_BASE_URL, api_key=RERANK_BINDING_API_KEY, top_k=top_k or 10, **kwargs, ) def initialize_rag(working_dir, graph_storage=None): if graph_storage is None: graph_storage = 'NetworkXStorage' rag = LightRAG( working_dir=working_dir, llm_model_func=llm_model_func, graph_storage=graph_storage, embedding_func=EmbeddingFunc( embedding_dim=EMBED_DIM, max_token_size=EMBED_MAX_TOKEN_SIZE, func=embedding_func ), rerank_model_func=rerank_func, ) rag.initialize_storages() initialize_pipeline_status() return rag def create_llm_model_func(): def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwargs): return openai_complete_if_cache( LLM_MODEL_NAME, prompt, system_prompt=system_prompt, history_messages=history_messages, api_key=LLM_API_KEY, base_url=LLM_BASE_URL, **kwargs, ) return llm_model_func def create_embedding_func(): return EmbeddingFunc( embedding_dim=1024, max_token_size=8192, func=lambda texts: openai_embed( texts, model=EMBED_MODEL_NAME, api_key=EMBED_API_KEY, base_url=EMBED_BASE_URL, ), )