80 lines
3.0 KiB
Python
80 lines
3.0 KiB
Python
import base64
|
||
import logging
|
||
from openai import OpenAI
|
||
|
||
from Config.Config import ALY_LLM_API_KEY, ALY_LLM_MODEL_NAME, ALY_LLM_BASE_URL
|
||
|
||
# 获取模块专属日志器
|
||
logger = logging.getLogger(__name__)
|
||
|
||
def extract_text_from_image(image_path):
|
||
"""
|
||
从图片中提取文字内容(排除几何图形中的文字)
|
||
Args:
|
||
image_path: 图片文件的绝对路径
|
||
Returns:
|
||
str: 提取的文字内容,若失败则返回空字符串
|
||
"""
|
||
try:
|
||
# 读取图片并转换为base64
|
||
with open(image_path, "rb") as image_file:
|
||
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
|
||
image_url = f"data:image/png;base64,{base64_image}"
|
||
|
||
# 初始化OpenAI客户端(QWen QVQ)
|
||
client = OpenAI(
|
||
api_key=ALY_LLM_API_KEY,
|
||
base_url=ALY_LLM_BASE_URL
|
||
)
|
||
|
||
# 构建API请求(遵循QWen的messages格式)
|
||
prompt = "【任务】仅提取图像中的文字内容(排除几何图形中的文字),并用【】包裹结果。"
|
||
prompt += "【要求】1. 不添加任何解释、分析或额外说明;2. 不包含思考过程;3. 仅返回提取的文本内容。"
|
||
prompt += "【2】如上面提到的信息外,其它信息一概不要输出!\n"
|
||
|
||
completion = client.chat.completions.create(
|
||
model="qvq-max",
|
||
messages=[{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "image_url", "image_url": {"url": image_url}},
|
||
{"type": "text", "text": prompt}
|
||
]
|
||
}],
|
||
stream=True
|
||
)
|
||
|
||
# 处理流式响应
|
||
extracted_text = []
|
||
for chunk in completion:
|
||
if not chunk.choices:
|
||
continue
|
||
|
||
delta = chunk.choices[0].delta
|
||
# 仅保留content字段,过滤reasoning_content
|
||
if hasattr(delta, 'content') and delta.content:
|
||
extracted_text.append(delta.content)
|
||
# 如果有reasoning_content也一并处理(根据QWen模型特性)
|
||
if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
|
||
extracted_text.append(delta.reasoning_content)
|
||
|
||
logger.info("文字提取完成")
|
||
return ''.join(extracted_text)
|
||
|
||
except FileNotFoundError:
|
||
logger.error(f"图片文件不存在: {image_path}")
|
||
except Exception as e:
|
||
logger.error(f"文字提取失败: {str(e)}", exc_info=True)
|
||
return ""
|
||
|
||
|
||
# 示例用法
|
||
if __name__ == "__main__":
|
||
# 配置日志(实际项目中应在主程序统一配置)
|
||
logging.basicConfig(level=logging.INFO)
|
||
# 测试图片路径
|
||
test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png"
|
||
# 调用提取函数
|
||
text = extract_text_from_image(test_image)
|
||
print("提取结果:\n", text)
|