From 67ca6560ef5d5414a0f6b3ec4c16239ef0270faa Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Thu, 27 Mar 2025 09:40:25 +0800 Subject: [PATCH] 'commit' --- AI/WxMini/Start.py | 96 +++--------------------------------- AI/WxMini/Utils/ImageUtil.py | 83 +++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 89 deletions(-) create mode 100644 AI/WxMini/Utils/ImageUtil.py diff --git a/AI/WxMini/Start.py b/AI/WxMini/Start.py index ca10724f..87242355 100644 --- a/AI/WxMini/Start.py +++ b/AI/WxMini/Start.py @@ -1,6 +1,5 @@ import asyncio import logging -import re import time import uuid from contextlib import asynccontextmanager @@ -23,6 +22,7 @@ from WxMini.Utils.MySQLUtil import init_mysql_pool, save_chat_to_mysql, get_chat from WxMini.Utils.MySQLUtil import update_risk, get_last_chat_log_id from WxMini.Utils.OssUtil import upload_mp3_to_oss_from_memory, get_sts_token from WxMini.Utils.TtsUtil import TTS +from WxMini.Utils.ImageUtil import * # 配置日志 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") @@ -391,6 +391,7 @@ async def get_risk_chat_logs( risk_flag: int = Query(..., description="风险标志(1 表示有风险,0 表示无风险 ,2:处理完毕)"), page: int = Query(default=1, ge=1, description="当前页码(默认值为 1)"), page_size: int = Query(default=10, ge=1, le=100, description="每页记录数(默认值为 10,最大值为 100)"), + person_id: str = Query(..., description="用户会话 ID"), current_user: dict = Depends(get_current_user) ): """ @@ -404,8 +405,7 @@ async def get_risk_chat_logs( offset = (page - 1) * page_size # 调用 get_chat_logs_by_risk_flag 方法 - logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag, current_user["person_id"], offset, - page_size) + logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag, person_id, offset, page_size) if not logs: return { "success": False, @@ -507,88 +507,6 @@ async def get_oss_upload_token(current_user: dict = Depends(get_current_user)): } -async def is_text_dominant(image_url): - """ - 判断图片是否主要是文字内容 - :param image_url: 图片 URL - :return: True(主要是文字) / False(主要是物体/场景) - """ - completion = await client.chat.completions.create( - model="qwen-vl-ocr", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": image_url, - "min_pixels": 28 * 28 * 4, - "max_pixels": 28 * 28 * 1280 - }, - {"type": "text", "text": "Read all the text in the image."}, - ] - } - ], - stream=False - ) - text = completion.choices[0].message.content - - # 判断是否只有英文和数字 - if re.match(r'^[A-Za-z0-9\s]+$', text): - print("识别到的内容只有英文和数字,可能是无意义的字符,调用识别内容功能。") - return False - return True - - -async def recognize_text(image_url): - """ - 识别图片中的文字,流式输出 - """ - completion = await client.chat.completions.create( - model="qwen-vl-ocr", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": image_url, - "min_pixels": 28 * 28 * 4, - "max_pixels": 28 * 28 * 1280 - }, - {"type": "text", "text": "Read all the text in the image."}, - ] - } - ], - stream=True - ) - async for chunk in completion: - if chunk.choices[0].delta.content is not None: - for char in chunk.choices[0].delta.content: - if char != ' ': - yield char - time.sleep(0.1) - - -async def recognize_content(image_url): - """ - 识别图片中的内容,流式输出 - """ - completion = await client.chat.completions.create( - model="qwen-vl-plus", - messages=[{"role": "user", "content": [ - {"type": "text", "text": "这是什么"}, - {"type": "image_url", "image_url": {"url": image_url}} - ]}], - stream=True - ) - async for chunk in completion: - if chunk.choices[0].delta.content is not None: - for char in chunk.choices[0].delta.content: - yield char - time.sleep(0.1) - - @app.get("/aichat/process_image") async def process_image(image_url: str, current_user: dict = Depends(get_current_user)): logger.info(f"current_user:{current_user['login_name']}") @@ -598,12 +516,12 @@ async def process_image(image_url: str, current_user: dict = Depends(get_current :return: 流式输出结果 """ try: - if await is_text_dominant(image_url): - print("检测到图片主要是文字内容,开始识别文字:") - return StreamingResponse(recognize_text(image_url), media_type="text/plain") + if await is_text_dominant(client, image_url): + logger.info("检测到图片主要是文字内容,开始识别文字:") + return StreamingResponse(recognize_text(client, image_url), media_type="text/plain") else: print("检测到图片主要是物体/场景,开始识别内容:") - return StreamingResponse(recognize_content(image_url), media_type="text/plain") + return StreamingResponse(recognize_content(client, image_url), media_type="text/plain") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) diff --git a/AI/WxMini/Utils/ImageUtil.py b/AI/WxMini/Utils/ImageUtil.py new file mode 100644 index 00000000..1675b2ad --- /dev/null +++ b/AI/WxMini/Utils/ImageUtil.py @@ -0,0 +1,83 @@ +import time +import re + +async def is_text_dominant(client,image_url): + """ + 判断图片是否主要是文字内容 + :param image_url: 图片 URL + :return: True(主要是文字) / False(主要是物体/场景) + """ + completion = await client.chat.completions.create( + model="qwen-vl-ocr", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": image_url, + "min_pixels": 28 * 28 * 4, + "max_pixels": 28 * 28 * 1280 + }, + {"type": "text", "text": "Read all the text in the image."}, + ] + } + ], + stream=False + ) + text = completion.choices[0].message.content + + # 判断是否只有英文和数字 + if re.match(r'^[A-Za-z0-9\s]+$', text): + print("识别到的内容只有英文和数字,可能是无意义的字符,调用识别内容功能。") + return False + return True + + +async def recognize_text(client,image_url): + """ + 识别图片中的文字,流式输出 + """ + completion = await client.chat.completions.create( + model="qwen-vl-ocr", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": image_url, + "min_pixels": 28 * 28 * 4, + "max_pixels": 28 * 28 * 1280 + }, + {"type": "text", "text": "Read all the text in the image."}, + ] + } + ], + stream=True + ) + async for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: + if char != ' ': + yield char + time.sleep(0.1) + + +async def recognize_content(client,image_url): + """ + 识别图片中的内容,流式输出 + """ + completion = await client.chat.completions.create( + model="qwen-vl-plus", + messages=[{"role": "user", "content": [ + {"type": "text", "text": "这是什么"}, + {"type": "image_url", "image_url": {"url": image_url}} + ]}], + stream=True + ) + async for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: + yield char + time.sleep(0.1) \ No newline at end of file