Files
dsProject/dsLightRag/Test/Backup/Delete_P4_GLM_Text.py

86 lines
3.4 KiB
Python
Raw Normal View History

2025-08-14 15:45:08 +08:00
import base64
import json
import logging
import requests
from Config.Config import GLM_API_KEY, GLM_MODEL_NAME, GLM_BASE_URL
# 获取模块专属日志器
logger = logging.getLogger(__name__)
def extract_text_from_image(image_path):
"""
从图片中提取文字内容排除几何图形中的文字
Args:
image_path: 图片文件的绝对路径
Returns:
str: 提取的文字内容若失败则返回空字符串
"""
try:
# 读取图片并转换为base64
with open(image_path, "rb") as image_file:
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
image_url = f"data:image/png;base64,{base64_image}"
# 构建API请求
# 合并重复的要求字段,使用清晰的编号系统
prompt = "【任务】提取图像中的数学题文字内容(排除几何图形),用【】包裹结果。"
prompt += "【专业术语】需准确识别:将军饮马、动点、等量代换、两点之间线段最短、垂线段最短"
prompt += "【格式要求】1. 术语错误需修正2. 保证句子语法通顺3. 不添加解释4. 不包含思考过程"
url = GLM_BASE_URL
headers = {
"Authorization": f"Bearer {GLM_API_KEY}",
"Content-Type": "application/json"
}
data = {
"model": GLM_MODEL_NAME,
"messages": [{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}}
]
}],
"stream": True
}
# 发送请求并处理流式响应
extracted_text = []
seen_content = set() # 用于去重的集合
with requests.post(url, headers=headers, json=data, stream=True) as response:
for chunk in response.iter_lines():
if chunk:
decoded = chunk.decode('utf-8')
if decoded.startswith('[DONE]'):
logger.info("文字提取完成")
break
try:
# 解析流式响应内容
json_data = json.loads(decoded[5:]) # 移除""前缀
content = json_data["choices"][0]["delta"].get('content')
if content and len(content) > 0 and content not in seen_content:
seen_content.add(content)
extracted_text.append(content)
except Exception as e:
logger.warning(f"解析响应块失败: {str(e)}")
# 拼接结果并返回
return ''.join(extracted_text)
except FileNotFoundError:
logger.error(f"图片文件不存在: {image_path}")
except Exception as e:
logger.error(f"文字提取失败: {str(e)}", exc_info=True)
return ""
# 示例用法
if __name__ == "__main__":
# 配置日志(实际项目中应在主程序统一配置)
logging.basicConfig(level=logging.INFO)
# 测试图片路径
test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png"
# 调用提取函数
text = extract_text_from_image(test_image)
print("提取结果:\n", text)