This commit is contained in:
2025-08-26 16:05:06 +08:00
parent 6ed079c9b4
commit 855c2052b3
1041 changed files with 9667 additions and 2010802 deletions

View File

@@ -4,7 +4,8 @@ import logging
from raganything import RAGAnything, RAGAnythingConfig
from lightrag.llm.openai import openai_complete_if_cache, openai_embed
from lightrag.utils import EmbeddingFunc
from logging.handlers import RotatingFileHandler# 导入RotatingFileHandler用于日志轮转
from logging.handlers import RotatingFileHandler # 导入RotatingFileHandler用于日志轮转
import Config.Config
# 设置根日志记录器的级别为INFO这样所有子记录器的日志都会被捕获
@@ -17,35 +18,33 @@ if not root_logger.handlers:
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
root_logger.addHandler(handler)
# 同时保持原有的ragAnything日志记录器配置
logger = logging.getLogger('ragAnything')
logger.setLevel(logging.INFO)
if not logger.handlers:
# 控制台输出处理器
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
# 循环滚动文件处理器控制在200K左右
file_handler = RotatingFileHandler(
'lightrag.log',
maxBytes=200*1024, # 200KB
backupCount=5, # 最多保留5个备份文件
encoding='utf-8',
delay=True # 延迟创建文件,直到有日志输出
)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
# 控制台输出处理器
console_handler = logging.StreamHandler()
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(console_handler)
# 循环滚动文件处理器控制在200K左右
file_handler = RotatingFileHandler(
'lightrag.log',
maxBytes=200 * 1024, # 200KB
backupCount=5, # 最多保留5个备份文件
encoding='utf-8',
delay=True # 延迟创建文件,直到有日志输出
)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
logger.addHandler(file_handler)
async def main():
async def train(file_path, output_dir, working_dir):
# 设置 API 配置
api_key = Config.Config.ALY_LLM_API_KEY
base_url = Config.Config.ALY_LLM_BASE_URL
# 创建 RAGAnything 配置
config = RAGAnythingConfig(
working_dir="./Topic/Geogebra",
working_dir=working_dir,
parser="mineru", # 选择解析器mineru 或 docling
parse_method="auto", # 解析方法auto, ocr 或 txt
enable_image_processing=True,
@@ -136,19 +135,14 @@ async def main():
# 处理文档
await rag.process_document_complete(
file_path="./Doc/GeoGebra.pdf",
output_dir="./Topic/Geogebra",
file_path=file_path,
output_dir=output_dir,
parse_method="auto"
)
# 查询处理后的内容
# 纯文本查询 - 基本知识库搜索
# text_result = await rag.aquery(
# "文档的主要内容是什么?",
# mode="hybrid"
# )
# print("文本查询结果:", text_result)
if __name__ == "__main__":
asyncio.run(main())
working_dir = "./Working"
output_dir = "./Topic/HuangWanQiao"
file_path = "./Doc/黄琬乔2023蓝桥杯省赛准考证.pdf"
asyncio.run(train(file_path, output_dir, working_dir))