You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

110 lines
3.7 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import time
import re
from WxMini.Utils.MySQLUtil import save_chat_to_mysql
async def is_text_dominant(client, image_url):
"""
判断图片是否主要是文字内容
:param image_url: 图片 URL
:return: True主要是文字 / False主要是物体/场景)
"""
completion = await client.chat.completions.create(
model="qwen-vl-ocr",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": image_url,
"min_pixels": 28 * 28 * 4,
"max_pixels": 28 * 28 * 1280
},
{"type": "text", "text": "Read all the text in the image."},
]
}
],
stream=False
)
text = completion.choices[0].message.content
# 判断是否只有英文和数字
if re.match(r'^[A-Za-z0-9\s]+$', text):
print("识别到的内容只有英文和数字,可能是无意义的字符,调用识别内容功能。")
return False
return True
async def recognize_text(client, pool, person_id, image_url):
"""
识别图片中的文字,流式输出,并将结果记录到数据库
:param client: AsyncOpenAI 客户端
:param pool: 数据库连接池
:param person_id: 用户 ID
:param image_url: 图片 URL
:return: 最终拼接的字符串
"""
completion = await client.chat.completions.create(
model="qwen-vl-ocr",
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": image_url,
"min_pixels": 28 * 28 * 4,
"max_pixels": 28 * 28 * 1280
},
{"type": "text", "text": "Read all the text in the image."},
]
}
],
stream=True
)
full_text = "" # 用于存储最终拼接的字符串
async for chunk in completion:
if chunk.choices[0].delta.content is not None:
for char in chunk.choices[0].delta.content:
if char != ' ':
yield char # 流式输出字符
full_text += char # 拼接字符
time.sleep(0.1) # 控制输出速度
# 记录到数据库
try:
await save_chat_to_mysql(pool, person_id, f'![]({image_url})', full_text, "", 0, 2, 2, 1)
except Exception as e:
print(f"记录到数据库时出错:{e}")
async def recognize_content(client, pool, person_id, image_url):
"""
识别图片中的内容,流式输出
"""
completion = await client.chat.completions.create(
model="qwen-vl-plus",
messages=[{"role": "user", "content": [
{"type": "text", "text": "这是什么"},
{"type": "image_url", "image_url": {"url": image_url}}
]}],
stream=True
)
full_text = "" # 用于存储最终拼接的字符串
async for chunk in completion:
if chunk.choices[0].delta.content is not None:
for char in chunk.choices[0].delta.content:
if char != ' ':
yield char # 流式输出字符
full_text += char # 拼接字符
time.sleep(0.1) # 控制输出速度
# 记录到数据库
try:
await save_chat_to_mysql(pool, person_id, f'![]({image_url})', full_text, "", 0, 2, 2, 2)
except Exception as e:
print(f"记录到数据库时出错:{e}")