From 1a61b6a67760d3640eedb3c1e421c6605c9c6b80 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Tue, 24 Jun 2025 19:19:53 +0800 Subject: [PATCH] 'commit' --- dsRag/Start.py | 66 +++++++++++++++++- .../T9_TestReadPptx.cpython-310.pyc | Bin 0 -> 864 bytes .../Util/__pycache__/WordUtil.cpython-310.pyc | Bin 946 -> 946 bytes 3 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 dsRag/Test/__pycache__/T9_TestReadPptx.cpython-310.pyc diff --git a/dsRag/Start.py b/dsRag/Start.py index 34cf1968..e5b85b70 100644 --- a/dsRag/Start.py +++ b/dsRag/Start.py @@ -1,19 +1,73 @@ +import threading +import time +from contextlib import asynccontextmanager from pathlib import Path import uvicorn from fastapi import FastAPI, UploadFile, File, HTTPException +# 在文件开头添加导入 from pymysql.cursors import DictCursor from Dao.KbDao import KbDao from Model.KbModel import KbModel, KbFileModel +from Test.T9_TestReadPptx import extract_text_from_pptx +from Util import PdfUtil, WordUtil from Util.MySQLUtil import init_mysql_pool -from contextlib import asynccontextmanager + +import logging +from logging.handlers import RotatingFileHandler + +# 确保logger已在文件开头正确初始化 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = RotatingFileHandler('Logs/document_processor.log', maxBytes=1024*1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) @asynccontextmanager async def lifespan(app: FastAPI): # 启动时初始化数据库连接池 app.state.kb_dao = KbDao(await init_mysql_pool()) + + # 启动文档处理线程 + # 修改函数定义 + async def document_processor(): + while True: + try: + # 获取未处理文档 + # 处理文档 + # 保存到ES + await asyncio.sleep(10) + except Exception as e: + logger.error(f"文档处理出错: {e}") + await asyncio.sleep(10) + + time.sleep(10) # 每10秒检查一次 + + # 修改线程启动部分 + # 修改线程启动方式 + import asyncio + + def run_async_in_thread(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + loop.run_until_complete(document_processor()) + finally: + loop.close() + + processor_thread = threading.Thread( + target=run_async_in_thread, + daemon=True + ) + processor_thread.start() + + # 启动文档处理任务 + task = asyncio.create_task(document_processor()) yield + # 关闭时取消任务 + task.cancel() + # 关闭时清理资源 await app.state.kb_dao.mysql_pool.close() @@ -192,4 +246,12 @@ async def upload_file( } if __name__ == "__main__": - uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file + uvicorn.run(app, host="0.0.0.0", port=8000) + + +# 确保logger已在文件开头正确初始化 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) +handler = RotatingFileHandler('Logs/document_processor.log', maxBytes=1024*1024, backupCount=5) +handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) +logger.addHandler(handler) \ No newline at end of file diff --git a/dsRag/Test/__pycache__/T9_TestReadPptx.cpython-310.pyc b/dsRag/Test/__pycache__/T9_TestReadPptx.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..64d7ffcd2d68b2c977f122a15880b0986879bcd0 GIT binary patch literal 864 zcmYjPO=}ZD7@nE^$Yz@sL@D+bZ$>ah5J3?U>{&uB2uo#IGSg<$&F;E0fmAn8NLwvZ z+e0n3Mo2DN!S*6ZZTuyBHQC_FtB42RiBuxJE33Wv+ONrBIb>tH@HBM)f5UU8fmKXG%5GF__-P(G{jLy@jnT zGpW(S7%`J&Wu(dj$Z3}8ZxA)#D*t-2Ul@BbWrl8;cYQ7#&&!9)q7wL5!%#HL??46> zaFQ!Uj5A+2!VUcUWh~pLrbEG6P;I_5Mrj{a7TzJf@eU9 z#zo|YiSC3U^J$`11J^I;Np`{Ym>oKzVku$7t(*k4%YiRI1xp)2!d{6u!uf!r9Kij$ zHgjv9@`pjR1fdiKRaO>|%{q(obBv4mxtq3(v&^9-xJ@x!P9_04a>~LU9kCDvHCtZP z060Dg&LAo_6_c3Q#E@aKLVlZiG$Gj=wq0{v-?r;Fpez=r=Ne+Vf7~7%KOb(dr>#%v z-buRlK7F+}ID9ue>OymHvNPN{==WX>dz(dhQ2ioo{j271fAy@CKfdnQGjejIdA=@x zaK3-MIXHYhXde{Dl8kNBplsWTDuG6FCnVtix%p0w*|wFHv$j+e7J{US&KfQk{PYFF s$o0iYzB6td&Pt5iwSd+=c1M0VE_tdbrx@5!jB_f>&$O zsL67RB_%((qKFB|DPjQ;AmJiuR6+*Gy~SaZo1apelWNBZ