Files
dsProject/dsLightRag/Test/Backup/Delete_P4_LLM_Text.py
2025-08-14 15:45:08 +08:00

80 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import base64
import logging
from openai import OpenAI
from Config.Config import ALY_LLM_API_KEY, ALY_LLM_MODEL_NAME, ALY_LLM_BASE_URL
# 获取模块专属日志器
logger = logging.getLogger(__name__)
def extract_text_from_image(image_path):
"""
从图片中提取文字内容(排除几何图形中的文字)
Args:
image_path: 图片文件的绝对路径
Returns:
str: 提取的文字内容,若失败则返回空字符串
"""
try:
# 读取图片并转换为base64
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
image_url = f"data:image/png;base64,{base64_image}"
# 初始化OpenAI客户端QWen QVQ
client = OpenAI(
api_key=ALY_LLM_API_KEY,
base_url=ALY_LLM_BASE_URL
)
# 构建API请求遵循QWen的messages格式
prompt = "【任务】仅提取图像中的文字内容(排除几何图形中的文字),并用【】包裹结果。"
prompt += "【要求】1. 不添加任何解释、分析或额外说明2. 不包含思考过程3. 仅返回提取的文本内容。"
prompt += "【2】如上面提到的信息外其它信息一概不要输出\n"
completion = client.chat.completions.create(
model="qvq-max",
messages=[{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": prompt}
]
}],
stream=True
)
# 处理流式响应
extracted_text = []
for chunk in completion:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# 仅保留content字段过滤reasoning_content
if hasattr(delta, 'content') and delta.content:
extracted_text.append(delta.content)
# 如果有reasoning_content也一并处理根据QWen模型特性
if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
extracted_text.append(delta.reasoning_content)
logger.info("文字提取完成")
return ''.join(extracted_text)
except FileNotFoundError:
logger.error(f"图片文件不存在: {image_path}")
except Exception as e:
logger.error(f"文字提取失败: {str(e)}", exc_info=True)
return ""
# 示例用法
if __name__ == "__main__":
# 配置日志(实际项目中应在主程序统一配置)
logging.basicConfig(level=logging.INFO)
# 测试图片路径
test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png"
# 调用提取函数
text = extract_text_from_image(test_image)
print("提取结果:\n", text)