Files
dsProject/dsLightRag/Test/Backup/Delete_P4_LLM_Text.py

80 lines
3.0 KiB
Python
Raw Normal View History

2025-08-14 15:45:08 +08:00
import base64
import logging
from openai import OpenAI
from Config.Config import ALY_LLM_API_KEY, ALY_LLM_MODEL_NAME, ALY_LLM_BASE_URL
# 获取模块专属日志器
logger = logging.getLogger(__name__)
def extract_text_from_image(image_path):
"""
从图片中提取文字内容排除几何图形中的文字
Args:
image_path: 图片文件的绝对路径
Returns:
str: 提取的文字内容若失败则返回空字符串
"""
try:
# 读取图片并转换为base64
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
image_url = f"data:image/png;base64,{base64_image}"
# 初始化OpenAI客户端QWen QVQ
client = OpenAI(
api_key=ALY_LLM_API_KEY,
base_url=ALY_LLM_BASE_URL
)
# 构建API请求遵循QWen的messages格式
prompt = "【任务】仅提取图像中的文字内容(排除几何图形中的文字),并用【】包裹结果。"
prompt += "【要求】1. 不添加任何解释、分析或额外说明2. 不包含思考过程3. 仅返回提取的文本内容。"
prompt += "【2】如上面提到的信息外其它信息一概不要输出\n"
completion = client.chat.completions.create(
model="qvq-max",
messages=[{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": image_url}},
{"type": "text", "text": prompt}
]
}],
stream=True
)
# 处理流式响应
extracted_text = []
for chunk in completion:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# 仅保留content字段过滤reasoning_content
if hasattr(delta, 'content') and delta.content:
extracted_text.append(delta.content)
# 如果有reasoning_content也一并处理根据QWen模型特性
if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
extracted_text.append(delta.reasoning_content)
logger.info("文字提取完成")
return ''.join(extracted_text)
except FileNotFoundError:
logger.error(f"图片文件不存在: {image_path}")
except Exception as e:
logger.error(f"文字提取失败: {str(e)}", exc_info=True)
return ""
# 示例用法
if __name__ == "__main__":
# 配置日志(实际项目中应在主程序统一配置)
logging.basicConfig(level=logging.INFO)
# 测试图片路径
test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png"
# 调用提取函数
text = extract_text_from_image(test_image)
print("提取结果:\n", text)