From 3f8ea34c24ba308006118b96ea9bd6a647774076 Mon Sep 17 00:00:00 2001 From: HuangHai <10402852@qq.com> Date: Thu, 27 Mar 2025 09:32:48 +0800 Subject: [PATCH] 'commit' --- AI/WxMini/Start.py | 118 ++++++++++++++++-- AI/WxMini/Test/OCR1_Image_Recognition.py | 16 ++- .../Test/OCR1_Image_Recognition_Stream.py | 26 ---- AI/WxMini/Test/TestAutoSelect.py | 109 ++++++++++++++++ AI/WxMini/Test/X1_ReadImage.py | 45 +++---- 5 files changed, 248 insertions(+), 66 deletions(-) delete mode 100644 AI/WxMini/Test/OCR1_Image_Recognition_Stream.py create mode 100644 AI/WxMini/Test/TestAutoSelect.py diff --git a/AI/WxMini/Start.py b/AI/WxMini/Start.py index 9262a160..ca10724f 100644 --- a/AI/WxMini/Start.py +++ b/AI/WxMini/Start.py @@ -1,5 +1,6 @@ import asyncio import logging +import re import time import uuid from contextlib import asynccontextmanager @@ -11,6 +12,7 @@ from fastapi.security import OAuth2PasswordBearer from jose import JWTError, jwt from openai import AsyncOpenAI from passlib.context import CryptContext +from starlette.responses import StreamingResponse from WxMini.Milvus.Config.MulvusConfig import * from WxMini.Milvus.Utils.MilvusCollectionManager import MilvusCollectionManager @@ -267,7 +269,7 @@ async def reply(person_id: str = Form(...), # 查询非向量字段 record = await asyncio.to_thread(collection_manager.query_by_id, hit.id) if record: - #logger.info(f"查询到的记录: {record}") + # logger.info(f"查询到的记录: {record}") # 添加历史交互 history_prompt += f"用户: {record['user_input']}\n大模型: {record['model_response']}\n" except Exception as e: @@ -275,7 +277,7 @@ async def reply(person_id: str = Form(...), # 限制历史交互提示词长度 history_prompt = history_prompt[:2000] - #logger.info(f"历史交互提示词: {history_prompt}") + # logger.info(f"历史交互提示词: {history_prompt}") # 调用大模型,将历史交互作为提示词 try: @@ -314,7 +316,7 @@ async def reply(person_id: str = Form(...), if len(result) > 500: logger.warning(f"大模型回复被截断,原始长度: {len(result)}") await asyncio.to_thread(collection_manager.insert_data, entities) - #logger.info("用户输入和大模型反馈已记录到向量数据库。") + # logger.info("用户输入和大模型反馈已记录到向量数据库。") # 调用 TTS 生成 MP3 uuid_str = str(uuid.uuid4()) @@ -327,7 +329,7 @@ async def reply(person_id: str = Form(...), t = TTS(None) # 传入 None 表示不保存到本地文件 audio_data, duration = await asyncio.to_thread(t.generate_audio, result) # 假设 TTS 类有一个 generate_audio 方法返回音频数据 - #print(f"音频时长: {duration} 秒") + # print(f"音频时长: {duration} 秒") # 将音频数据直接上传到 OSS await asyncio.to_thread(upload_mp3_to_oss_from_memory, tts_file, audio_data) @@ -338,7 +340,7 @@ async def reply(person_id: str = Form(...), # 记录聊天数据到 MySQL await save_chat_to_mysql(app.state.mysql_pool, person_id, prompt, result, url, duration) - #logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。") + # logger.info("用户输入和大模型反馈已记录到 MySQL 数据库。") # 调用会话检查机制,异步执行 asyncio.create_task(on_session_end(person_id)) @@ -402,7 +404,8 @@ async def get_risk_chat_logs( offset = (page - 1) * page_size # 调用 get_chat_logs_by_risk_flag 方法 - logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag,current_user["person_id"], offset, page_size) + logs, total = await get_chat_logs_by_risk_flag(app.state.mysql_pool, risk_flag, current_user["person_id"], offset, + page_size) if not logs: return { "success": False, @@ -425,7 +428,6 @@ async def get_risk_chat_logs( } - # 获取风险统计接口 @app.get("/aichat/chat_logs_summary") async def chat_logs_summary( @@ -484,6 +486,7 @@ async def chat_logs_summary( } } + # 获取上传OSS的授权Token @app.get("/aichat/get_oss_upload_token") async def get_oss_upload_token(current_user: dict = Depends(get_current_user)): @@ -504,6 +507,107 @@ async def get_oss_upload_token(current_user: dict = Depends(get_current_user)): } +async def is_text_dominant(image_url): + """ + 判断图片是否主要是文字内容 + :param image_url: 图片 URL + :return: True(主要是文字) / False(主要是物体/场景) + """ + completion = await client.chat.completions.create( + model="qwen-vl-ocr", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": image_url, + "min_pixels": 28 * 28 * 4, + "max_pixels": 28 * 28 * 1280 + }, + {"type": "text", "text": "Read all the text in the image."}, + ] + } + ], + stream=False + ) + text = completion.choices[0].message.content + + # 判断是否只有英文和数字 + if re.match(r'^[A-Za-z0-9\s]+$', text): + print("识别到的内容只有英文和数字,可能是无意义的字符,调用识别内容功能。") + return False + return True + + +async def recognize_text(image_url): + """ + 识别图片中的文字,流式输出 + """ + completion = await client.chat.completions.create( + model="qwen-vl-ocr", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": image_url, + "min_pixels": 28 * 28 * 4, + "max_pixels": 28 * 28 * 1280 + }, + {"type": "text", "text": "Read all the text in the image."}, + ] + } + ], + stream=True + ) + async for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: + if char != ' ': + yield char + time.sleep(0.1) + + +async def recognize_content(image_url): + """ + 识别图片中的内容,流式输出 + """ + completion = await client.chat.completions.create( + model="qwen-vl-plus", + messages=[{"role": "user", "content": [ + {"type": "text", "text": "这是什么"}, + {"type": "image_url", "image_url": {"url": image_url}} + ]}], + stream=True + ) + async for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: + yield char + time.sleep(0.1) + + +@app.get("/aichat/process_image") +async def process_image(image_url: str, current_user: dict = Depends(get_current_user)): + logger.info(f"current_user:{current_user['login_name']}") + """ + 处理图片,自动判断调用哪个功能 + :param image_url: 图片 URL + :return: 流式输出结果 + """ + try: + if await is_text_dominant(image_url): + print("检测到图片主要是文字内容,开始识别文字:") + return StreamingResponse(recognize_text(image_url), media_type="text/plain") + else: + print("检测到图片主要是物体/场景,开始识别内容:") + return StreamingResponse(recognize_content(image_url), media_type="text/plain") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + # 运行 FastAPI 应用 if __name__ == "__main__": import uvicorn diff --git a/AI/WxMini/Test/OCR1_Image_Recognition.py b/AI/WxMini/Test/OCR1_Image_Recognition.py index dd0e9b97..47f9d2f4 100644 --- a/AI/WxMini/Test/OCR1_Image_Recognition.py +++ b/AI/WxMini/Test/OCR1_Image_Recognition.py @@ -1,3 +1,4 @@ +import time from openai import OpenAI from WxMini.Milvus.Config.MulvusConfig import * @@ -5,6 +6,8 @@ client = OpenAI( api_key=MODEL_API_KEY, base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", ) +image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg" + completion = client.chat.completions.create( model="qwen-vl-ocr", messages=[ @@ -13,7 +16,7 @@ completion = client.chat.completions.create( "content": [ { "type": "image_url", - "image_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg", + "image_url": image_url, "min_pixels": 28 * 28 * 4, "max_pixels": 28 * 28 * 1280 }, @@ -22,12 +25,17 @@ completion = client.chat.completions.create( ] } ], - stream=True) + stream=True +) + full_content = "" print("流式输出内容为:") for chunk in completion: if chunk.choices[0].delta.content is None: continue + # 遍历每个字符并逐个输出 + for char in chunk.choices[0].delta.content: + if char!=' ': + print(char, end="", flush=True) # 逐个输出字符,不换行 + time.sleep(0.1) # 控制输出速度 full_content += chunk.choices[0].delta.content - print(chunk.choices[0].delta.content) -print(f"完整内容为:{full_content}") \ No newline at end of file diff --git a/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py b/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py deleted file mode 100644 index 55c451a4..00000000 --- a/AI/WxMini/Test/OCR1_Image_Recognition_Stream.py +++ /dev/null @@ -1,26 +0,0 @@ -from openai import OpenAI -from WxMini.Milvus.Config.MulvusConfig import * - -client = OpenAI( - api_key=MODEL_API_KEY, - base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", -) -completion = client.chat.completions.create( - model="qwen-vl-ocr", - messages=[ - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg", - "min_pixels": 28 * 28 * 4, - "max_pixels": 28 * 28 * 1280 - }, - # 为保证识别效果,目前模型内部会统一使用"Read all the text in the image."进行识别,用户输入的文本不会生效。 - {"type": "text", "text": "Read all the text in the image."}, - ] - } - ]) - -print(completion.choices[0].message.content) \ No newline at end of file diff --git a/AI/WxMini/Test/TestAutoSelect.py b/AI/WxMini/Test/TestAutoSelect.py new file mode 100644 index 00000000..639abdd9 --- /dev/null +++ b/AI/WxMini/Test/TestAutoSelect.py @@ -0,0 +1,109 @@ +import re +import time +from openai import OpenAI +from WxMini.Milvus.Config.MulvusConfig import * + +client = OpenAI( + api_key=MODEL_API_KEY, + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", +) + +def is_text_dominant(image_url): + """ + 判断图片是否主要是文字内容 + :param image_url: 图片 URL + :return: True(主要是文字) / False(主要是物体/场景) + """ + completion = client.chat.completions.create( + model="qwen-vl-ocr", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": image_url, + "min_pixels": 28 * 28 * 4, + "max_pixels": 28 * 28 * 1280 + }, + {"type": "text", "text": "Read all the text in the image."}, + ] + } + ], + stream=False + ) + text = completion.choices[0].message.content + # 判断条件 + # 1、有汉字出现就是文字 + # 2、如果是英文,但是识别出来的内容只有英文,认为是文字 + # 判断是否只有英文和数字 + if re.match(r'^[A-Za-z0-9\s]+$', text): + print("识别到的内容只有英文和数字,可能是无意义的字符,调用识别内容功能。") + return False + return True + +def recognize_text(image_url): + """ + 识别图片中的文字 + """ + completion = client.chat.completions.create( + model="qwen-vl-ocr", + messages=[ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": image_url, + "min_pixels": 28 * 28 * 4, + "max_pixels": 28 * 28 * 1280 + }, + {"type": "text", "text": "Read all the text in the image."}, + ] + } + ], + stream=True + ) + print("流式输出内容为:") + for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: + if char != ' ': + print(char, end="", flush=True) + time.sleep(0.1) + +def recognize_content(image_url): + """ + 识别图片中的内容 + """ + completion = client.chat.completions.create( + model="qwen-vl-plus", + messages=[{"role": "user", "content": [ + {"type": "text", "text": "这是什么"}, + {"type": "image_url", "image_url": {"url": image_url}} + ]}], + stream=True + ) + print("流式输出结果:") + for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: + print(char, end="", flush=True) + time.sleep(0.1) + +def process_image(image_url): + """ + 处理图片,自动判断调用哪个功能 + """ + if is_text_dominant(image_url): + print("检测到图片主要是文字内容,开始识别文字:") + recognize_text(image_url) + else: + print("检测到图片主要是物体/场景,开始识别内容:") + recognize_content(image_url) + +# 示例调用 +#image_url = "https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png" +image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg" + +process_image(image_url) \ No newline at end of file diff --git a/AI/WxMini/Test/X1_ReadImage.py b/AI/WxMini/Test/X1_ReadImage.py index f9f88fd2..a4399ae7 100644 --- a/AI/WxMini/Test/X1_ReadImage.py +++ b/AI/WxMini/Test/X1_ReadImage.py @@ -1,4 +1,4 @@ -import json +import time # 导入 time 模块 from openai import OpenAI from WxMini.Milvus.Config.MulvusConfig import * @@ -6,37 +6,24 @@ client = OpenAI( api_key=MODEL_API_KEY, base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", ) -# 一盆花 -# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/mudan.jpg' -# 狗与女主人 -# photo_url = "https://dashscope.oss-cn-beijing.aliyuncs.com/images/dog_and_girl.jpeg" - -# 哪吒 -# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/nezha.jpg' - -# 锅包肉 -# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/gbr.jpg' - -# 儿童看图说话 -# photo_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/xiaoxiong.jpg' - -# 詹姆斯与库里 -photo_url='https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png' +# 图片 URL +image_url = 'https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png' +# 创建流式请求 completion = client.chat.completions.create( - model="qwen-vl-plus", - # 此处以qwen-vl-plus为例,可按需更换模型名称。模型列表:https://help.aliyun.com/zh/model-studio/getting-started/models + model="qwen-vl-plus", # 使用 qwen-vl-plus 模型 messages=[{"role": "user", "content": [ {"type": "text", "text": "这是什么"}, - {"type": "image_url", - "image_url": {"url": photo_url}} - ]}] + {"type": "image_url", "image_url": {"url": image_url}} + ]}], + stream=True # 启用流式输出 ) -json_data = completion.model_dump_json() -# 解析 JSON 数据 -data = json.loads(json_data) -# 提取 content 内容 -content = data['choices'][0]['message']['content'] -# 打印 content -print(content) + +# 流式输出结果 +print("流式输出结果:") +for chunk in completion: + if chunk.choices[0].delta.content is not None: + for char in chunk.choices[0].delta.content: # 逐个字符输出 + print(char, end="", flush=True) # 逐个字符输出,不换行 + time.sleep(0.1) # 控制输出速度,延迟 0.1 秒 \ No newline at end of file