Files
dsProject/dsLightRag/Test/Backup/Delete_P4_GLM_Text.py
2025-08-14 15:45:08 +08:00

86 lines
3.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import base64
import json
import logging
import requests
from Config.Config import GLM_API_KEY, GLM_MODEL_NAME, GLM_BASE_URL
# 获取模块专属日志器
logger = logging.getLogger(__name__)
def extract_text_from_image(image_path):
"""
从图片中提取文字内容(排除几何图形中的文字)
Args:
image_path: 图片文件的绝对路径
Returns:
str: 提取的文字内容,若失败则返回空字符串
"""
try:
# 读取图片并转换为base64
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
image_url = f"data:image/png;base64,{base64_image}"
# 构建API请求
# 合并重复的要求字段,使用清晰的编号系统
prompt = "【任务】提取图像中的数学题文字内容(排除几何图形),用【】包裹结果。"
prompt += "【专业术语】需准确识别:将军饮马、动点、等量代换、两点之间线段最短、垂线段最短"
prompt += "【格式要求】1. 术语错误需修正2. 保证句子语法通顺3. 不添加解释4. 不包含思考过程"
url = GLM_BASE_URL
headers = {
"Authorization": f"Bearer {GLM_API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": GLM_MODEL_NAME,
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
}],
"stream": True
}
# 发送请求并处理流式响应
extracted_text = []
seen_content = set() # 用于去重的集合
with requests.post(url, headers=headers, json=data, stream=True) as response:
for chunk in response.iter_lines():
if chunk:
decoded = chunk.decode('utf-8')
if decoded.startswith('[DONE]'):
logger.info("文字提取完成")
break
try:
# 解析流式响应内容
json_data = json.loads(decoded[5:]) # 移除""前缀
content = json_data["choices"][0]["delta"].get('content')
if content and len(content) > 0 and content not in seen_content:
seen_content.add(content)
extracted_text.append(content)
except Exception as e:
logger.warning(f"解析响应块失败: {str(e)}")
# 拼接结果并返回
return ''.join(extracted_text)
except FileNotFoundError:
logger.error(f"图片文件不存在: {image_path}")
except Exception as e:
logger.error(f"文字提取失败: {str(e)}", exc_info=True)
return ""
# 示例用法
if __name__ == "__main__":
# 配置日志(实际项目中应在主程序统一配置)
logging.basicConfig(level=logging.INFO)
# 测试图片路径
test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png"
# 调用提取函数
text = extract_text_from_image(test_image)
print("提取结果:\n", text)