109 lines
3.6 KiB
Python
109 lines
3.6 KiB
Python
import re
|
||
import time
|
||
from openai import OpenAI
|
||
from WxMini.Milvus.Config.MulvusConfig import *
|
||
|
||
client = OpenAI(
|
||
api_key=MODEL_API_KEY,
|
||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||
)
|
||
|
||
def is_text_dominant(image_url):
|
||
"""
|
||
判断图片是否主要是文字内容
|
||
:param image_url: 图片 URL
|
||
:return: True(主要是文字) / False(主要是物体/场景)
|
||
"""
|
||
completion = client.chat.completions.create(
|
||
model="qwen-vl-ocr",
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": image_url,
|
||
"min_pixels": 28 * 28 * 4,
|
||
"max_pixels": 28 * 28 * 1280
|
||
},
|
||
{"type": "text", "text": "Read all the text in the image."},
|
||
]
|
||
}
|
||
],
|
||
stream=False
|
||
)
|
||
text = completion.choices[0].message.content
|
||
# 判断条件
|
||
# 1、有汉字出现就是文字
|
||
# 2、如果是英文,但是识别出来的内容只有英文,认为是文字
|
||
# 判断是否只有英文和数字
|
||
if re.match(r'^[A-Za-z0-9\s]+$', text):
|
||
print("识别到的内容只有英文和数字,可能是无意义的字符,调用识别内容功能。")
|
||
return False
|
||
return True
|
||
|
||
def recognize_text(image_url):
|
||
"""
|
||
识别图片中的文字
|
||
"""
|
||
completion = client.chat.completions.create(
|
||
model="qwen-vl-ocr",
|
||
messages=[
|
||
{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image_url",
|
||
"image_url": image_url,
|
||
"min_pixels": 28 * 28 * 4,
|
||
"max_pixels": 28 * 28 * 1280
|
||
},
|
||
{"type": "text", "text": "Read all the text in the image."},
|
||
]
|
||
}
|
||
],
|
||
stream=True
|
||
)
|
||
print("流式输出内容为:")
|
||
for chunk in completion:
|
||
if chunk.choices[0].delta.content is not None:
|
||
for char in chunk.choices[0].delta.content:
|
||
if char != ' ':
|
||
print(char, end="", flush=True)
|
||
time.sleep(0.1)
|
||
|
||
def recognize_content(image_url):
|
||
"""
|
||
识别图片中的内容
|
||
"""
|
||
completion = client.chat.completions.create(
|
||
model="qwen-vl-plus",
|
||
messages=[{"role": "user", "content": [
|
||
{"type": "text", "text": "这是什么"},
|
||
{"type": "image_url", "image_url": {"url": image_url}}
|
||
]}],
|
||
stream=True
|
||
)
|
||
print("流式输出结果:")
|
||
for chunk in completion:
|
||
if chunk.choices[0].delta.content is not None:
|
||
for char in chunk.choices[0].delta.content:
|
||
print(char, end="", flush=True)
|
||
time.sleep(0.1)
|
||
|
||
def process_image(image_url):
|
||
"""
|
||
处理图片,自动判断调用哪个功能
|
||
"""
|
||
if is_text_dominant(image_url):
|
||
print("检测到图片主要是文字内容,开始识别文字:")
|
||
recognize_text(image_url)
|
||
else:
|
||
print("检测到图片主要是物体/场景,开始识别内容:")
|
||
recognize_content(image_url)
|
||
|
||
# 示例调用
|
||
#image_url = "https://ylt.oss-cn-hangzhou.aliyuncs.com/Temp/james.png"
|
||
image_url = "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg"
|
||
|
||
process_image(image_url) |