87 lines
3.5 KiB
Python
87 lines
3.5 KiB
Python
import base64
|
||
import json
|
||
import logging
|
||
import requests
|
||
from Config.Config import GLM_API_KEY, GLM_MODEL_NAME, GLM_BASE_URL
|
||
|
||
# 获取模块专属日志器
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def extract_text_from_image(image_path):
|
||
"""
|
||
从图片中提取文字内容(排除几何图形中的文字)
|
||
Args:
|
||
image_path: 图片文件的绝对路径
|
||
Returns:
|
||
str: 提取的文字内容,若失败则返回空字符串
|
||
"""
|
||
try:
|
||
# 读取图片并转换为base64
|
||
with open(image_path, "rb") as image_file:
|
||
base64_image = base64.b64encode(image_file.read()).decode('utf-8')
|
||
image_url = f"data:image/png;base64,{base64_image}"
|
||
|
||
# 构建API请求
|
||
# 合并重复的要求字段,使用清晰的编号系统
|
||
prompt = "【任务】提取图像中的数学题文字内容(排除几何图形),用【】包裹结果。"
|
||
#prompt += "【专业术语】需准确识别:将军饮马、动点、等量代换、两点之间线段最短、垂线段最短"
|
||
prompt += "【格式】1. 术语错误需修正;2. 保证句子语法通顺;3. 不添加解释;4. 不包含思考过程"
|
||
url = GLM_BASE_URL
|
||
headers = {
|
||
"Authorization": f"Bearer {GLM_API_KEY}",
|
||
"Content-Type": "application/json"
|
||
}
|
||
data = {
|
||
"model": GLM_MODEL_NAME,
|
||
"messages": [{
|
||
"role": "user",
|
||
"content": [
|
||
{"type": "text", "text": prompt},
|
||
{"type": "image_url", "image_url": {"url": image_url}}
|
||
]
|
||
}],
|
||
"stream": True
|
||
}
|
||
|
||
# 发送请求并处理流式响应
|
||
extracted_text = []
|
||
seen_content = set() # 用于去重的集合
|
||
with requests.post(url, headers=headers, json=data, stream=True) as response:
|
||
for chunk in response.iter_lines():
|
||
if chunk:
|
||
decoded = chunk.decode('utf-8')
|
||
if decoded.startswith('[DONE]'):
|
||
logger.info("文字提取完成")
|
||
break
|
||
try:
|
||
# 解析流式响应内容
|
||
json_data = json.loads(decoded[5:]) # 移除""前缀
|
||
content = json_data["choices"][0]["delta"].get('content')
|
||
if content and len(content) > 0 and content not in seen_content:
|
||
seen_content.add(content)
|
||
extracted_text.append(content)
|
||
except Exception as e:
|
||
logger.warning(f"解析响应块失败: {str(e)}")
|
||
|
||
# 拼接结果并返回
|
||
return ''.join(extracted_text)
|
||
|
||
except FileNotFoundError:
|
||
logger.error(f"图片文件不存在: {image_path}")
|
||
except Exception as e:
|
||
logger.error(f"文字提取失败: {str(e)}", exc_info=True)
|
||
return ""
|
||
|
||
|
||
# 示例用法
|
||
if __name__ == "__main__":
|
||
# 配置日志(实际项目中应在主程序统一配置)
|
||
logging.basicConfig(level=logging.INFO)
|
||
# 测试图片路径
|
||
#test_image = r"D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png"
|
||
test_image = r'D:\dsWork\dsProject\dsLightRag\Test\extracted\a62dce9d67c818accf94113aabefe172\1_1_TXT.png'
|
||
# 调用提取函数
|
||
text = extract_text_from_image(test_image)
|
||
print("提取结果:\n", text)
|