import time import requests from openai import OpenAI, AsyncOpenAI from WxMini.Milvus.Config.MulvusConfig import MODELSCOPE_ACCESS_TOKEN from WxMini.Utils.MySQLUtil import save_chat_to_mysql async def recognize_text(client, pool, person_id, image_url): """ 识别图片中的文字,流式输出,并将结果记录到数据库 :param client: AsyncOpenAI 客户端 :param pool: 数据库连接池 :param person_id: 用户 ID :param image_url: 图片 URL :return: 最终拼接的字符串 """ completion = await client.chat.completions.create( model="qwen-vl-ocr", messages=[ { "role": "user", "content": [ { "type": "image_url", "image_url": image_url, "min_pixels": 28 * 28 * 4, "max_pixels": 28 * 28 * 1280 }, {"type": "text", "text": "Read all the text in the image."}, ] } ], stream=True ) full_text = "" # 用于存储最终拼接的字符串 async for chunk in completion: if chunk.choices[0].delta.content is not None: for char in chunk.choices[0].delta.content: if char != ' ': yield char # 流式输出字符 full_text += char # 拼接字符 print(char, end='') time.sleep(0.1) # 控制输出速度 # 获取图片宽高 image_width, image_height = getImgWidthHeight(image_url) # 记录到数据库 try: await save_chat_to_mysql(pool, person_id, f'{image_url}', full_text, "", 0, 2, 2, 1, image_width, image_height) except Exception as e: print(f"记录到数据库时出错:{e}") async def recognize_content(client, pool, person_id, image_url): """ 识别图片中的内容,流式输出 """ completion = await client.chat.completions.create( model="qwen-vl-plus", messages=[{"role": "user", "content": [ {"type": "text", "text": "这是什么"}, {"type": "image_url", "image_url": {"url": image_url}} ]}], stream=True ) full_text = "" # 用于存储最终拼接的字符串 async for chunk in completion: if chunk.choices[0].delta.content is not None: for char in chunk.choices[0].delta.content: if char != ' ': yield char # 流式输出字符 full_text += char # 拼接字符 # print(char, end='') time.sleep(0.1) # 控制输出速度 # 获取图片宽高 image_width, image_height = getImgWidthHeight(image_url) # 记录到数据库 try: await save_chat_to_mysql(pool, person_id, f'{image_url}', full_text, "", 0, 2, 2, 2, image_width, image_height) except Exception as e: print(f"记录到数据库时出错:{e}") async def recognize_math(pool, person_id, image_url): client = AsyncOpenAI( api_key=MODELSCOPE_ACCESS_TOKEN, base_url="https://api-inference.modelscope.cn/v1" ) """ 识别图片中的数学题,流式输出,并将结果记录到数据库 :param client: AsyncOpenAI 客户端 :param pool: 数据库连接池 :param person_id: 用户 ID :param image_url: 图片 URL :return: 最终拼接的字符串 """ # 提示词 prompt = "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step." completion = await client.chat.completions.create( model="Qwen/Qwen2.5-VL-32B-Instruct", # model="Qwen/Qwen2.5-VL-72B-Instruct", messages=[ { "role": "system", "content": [ {"type": "text", "text": prompt} ], }, { "role": "user", "content": [ { "type": "image_url", "image_url": {"url": image_url} }, {"type": "text", "text": "请使用中文回答:如何作答?"}, ], } ], stream=True ) full_text = "" # 用于存储最终拼接的字符串 async for chunk in completion: if chunk.choices[0].delta.content is not None: for char in chunk.choices[0].delta.content: if char != ' ': yield char # 流式输出字符 full_text += char # 拼接字符 print(char, end='') time.sleep(0.1) # 控制输出速度 # 获取图片宽高 image_width, image_height = getImgWidthHeight(image_url) # 记录到数据库 try: await save_chat_to_mysql(pool, person_id, f'{image_url}', full_text, "", 0, 2, 2, 1, image_width, image_height) except Exception as e: print(f"记录到数据库时出错:{e}") # 获取图片的宽高 def getImgWidthHeight(img_url): try: url = f'{img_url}?x-oss-process=image/info' response = requests.get(url) jo = response.json() width = int(jo['ImageWidth']['value']) height = int(jo['ImageHeight']['value']) return width, height except: return 182, 182