QingLong/AI/WxMini/Utils/ImageUtil.py

import time

from openai import OpenAI, AsyncOpenAI

from WxMini.Milvus.Config.MulvusConfig import MODELSCOPE_ACCESS_TOKEN
from WxMini.Utils.MySQLUtil import save_chat_to_mysql


async def recognize_text(client, pool, person_id, image_url):
    """
    识别图片中的文字，流式输出，并将结果记录到数据库
    :param client: AsyncOpenAI 客户端
    :param pool: 数据库连接池
    :param person_id: 用户 ID
    :param image_url: 图片 URL
    :return: 最终拼接的字符串
    """
    completion = await client.chat.completions.create(
        model="qwen-vl-ocr",
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": image_url,
                        "min_pixels": 28 * 28 * 4,
                        "max_pixels": 28 * 28 * 1280
                    },
                    {"type": "text", "text": "Read all the text in the image."},
                ]
            }
        ],
        stream=True
    )

    full_text = ""  # 用于存储最终拼接的字符串
    async for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                if char != ' ':
                    yield char  # 流式输出字符
                    full_text += char  # 拼接字符
                    print(char, end='')
                    time.sleep(0.1)  # 控制输出速度

    # 记录到数据库
    try:
        await save_chat_to_mysql(pool, person_id, f'{image_url}', full_text, "", 0, 2, 2, 1)
    except Exception as e:
        print(f"记录到数据库时出错：{e}")


async def recognize_content(client, pool, person_id, image_url):
    """
    识别图片中的内容，流式输出
    """
    completion = await client.chat.completions.create(
        model="qwen-vl-plus",
        messages=[{"role": "user", "content": [
            {"type": "text", "text": "这是什么"},
            {"type": "image_url", "image_url": {"url": image_url}}
        ]}],
        stream=True
    )
    full_text = ""  # 用于存储最终拼接的字符串
    async for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                if char != ' ':
                    yield char  # 流式输出字符
                    full_text += char  # 拼接字符
                    print(char, end='')
                    time.sleep(0.1)  # 控制输出速度

    # 记录到数据库
    try:
        await save_chat_to_mysql(pool, person_id, f'{image_url}', full_text, "", 0, 2, 2, 2)
    except Exception as e:
        print(f"记录到数据库时出错：{e}")


async def recognize_math(pool, person_id, image_url):
    client = AsyncOpenAI(
        api_key=MODELSCOPE_ACCESS_TOKEN,
        base_url="https://api-inference.modelscope.cn/v1"
    )
    """
    识别图片中的数学题，流式输出，并将结果记录到数据库
    :param client: AsyncOpenAI 客户端
    :param pool: 数据库连接池
    :param person_id: 用户 ID
    :param image_url: 图片 URL
    :return: 最终拼接的字符串
    """
    # 提示词
    prompt = "You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step."

    completion = await client.chat.completions.create(
        model="Qwen/Qwen2.5-VL-32B-Instruct",
        #model="Qwen/Qwen2.5-VL-72B-Instruct",
        messages=[
            {
                "role": "system",
                "content": [
                    {"type": "text", "text": prompt}
                ],
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {"url": image_url}
                    },
                    {"type": "text", "text": "请使用中文回答：如何作答?"},
                ],
            }
        ],
        stream=True
    )

    full_text = ""  # 用于存储最终拼接的字符串
    async for chunk in completion:
        if chunk.choices[0].delta.content is not None:
            for char in chunk.choices[0].delta.content:
                if char != ' ':
                    yield char  # 流式输出字符
                    full_text += char  # 拼接字符
                    print(char, end='')
                    time.sleep(0.1)  # 控制输出速度

    # 记录到数据库
    try:
        await save_chat_to_mysql(pool, person_id, f'{image_url}', full_text, "", 0, 2, 2, 1)
    except Exception as e:
        print(f"记录到数据库时出错：{e}")