diff --git a/dsRag/Start.py b/dsRag/Start.py index 34cf1968..e5b85b70 100644 --- a/dsRag/Start.py +++ b/dsRag/Start.py @@ -1,19 +1,73 @@ +import threading +import time +from contextlib import asynccontextmanager from pathlib import Path import uvicorn from fastapi import FastAPI, UploadFile, File, HTTPException +# 在文件开头添加导入 from pymysql.cursors import DictCursor from Dao.KbDao import KbDao from Model.KbModel import KbModel, KbFileModel +from Test.T9_TestReadPptx import extract_text_from_pptx +from Util import PdfUtil, WordUtil from Util.MySQLUtil import init_mysql_pool -from contextlib import asynccontextmanager + +import logging +from logging.handlers import RotatingFileHandler + +# 确保logger已在文件开头正确初始化 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = RotatingFileHandler('Logs/document_processor.log', maxBytes=1024*1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) @asynccontextmanager async def lifespan(app: FastAPI): # 启动时初始化数据库连接池 app.state.kb_dao = KbDao(await init_mysql_pool()) + + # 启动文档处理线程 + # 修改函数定义 + async def document_processor(): + while True: + try: + # 获取未处理文档 + # 处理文档 + # 保存到ES + await asyncio.sleep(10) + except Exception as e: + logger.error(f"文档处理出错: {e}") + await asyncio.sleep(10) + + time.sleep(10) # 每10秒检查一次 + + # 修改线程启动部分 + # 修改线程启动方式 + import asyncio + + def run_async_in_thread(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(document_processor()) + finally: + loop.close() + + processor_thread = threading.Thread( + target=run_async_in_thread, + daemon=True + ) + processor_thread.start() + + # 启动文档处理任务 + task = asyncio.create_task(document_processor()) yield + # 关闭时取消任务 + task.cancel() + # 关闭时清理资源 await app.state.kb_dao.mysql_pool.close() @@ -192,4 +246,12 @@ async def upload_file( } if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) + + +# 确保logger已在文件开头正确初始化 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = RotatingFileHandler('Logs/document_processor.log', maxBytes=1024*1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) \ No newline at end of file diff --git a/dsRag/Test/__pycache__/T9_TestReadPptx.cpython-310.pyc b/dsRag/Test/__pycache__/T9_TestReadPptx.cpython-310.pyc new file mode 100644 index 00000000..64d7ffcd Binary files /dev/null and b/dsRag/Test/__pycache__/T9_TestReadPptx.cpython-310.pyc differ diff --git a/dsRag/Util/__pycache__/WordUtil.cpython-310.pyc b/dsRag/Util/__pycache__/WordUtil.cpython-310.pyc index 9f7445b8..3094c6c7 100644 Binary files a/dsRag/Util/__pycache__/WordUtil.cpython-310.pyc and b/dsRag/Util/__pycache__/WordUtil.cpython-310.pyc differ