Files
dsProject/dsLightRag/Util/ShiTiRecognizer.py
2025-09-01 07:40:41 +08:00

192 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import logging
from typing import Dict, Any
from alibabacloud_credentials.client import Client as CredentialClient
from alibabacloud_credentials.models import Config as CredentialConfig
from alibabacloud_ocr_api20210707 import models as OcrModels
from alibabacloud_ocr_api20210707.client import Client as OcrClient
from alibabacloud_tea_openapi import models as OpenApiModels
from alibabacloud_tea_util import models as UtilModels
from alibabacloud_tea_util.client import Client as UtilClient
from Config.Config import ALY_AK, ALY_SK
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
class ShiTiRecognizer:
"""阿里云OCR试题识别服务封装类"""
# 默认配置
DEFAULT_ENDPOINT = 'ocr-api.cn-hangzhou.aliyuncs.com'
def __init__(self, access_key_id: str = None, access_key_secret: str = None, endpoint: str = None):
"""
初始化试题识别器
Args:
access_key_id: 阿里云访问密钥ID
access_key_secret: 阿里云访问密钥Secret
endpoint: OCR服务端点
"""
self.access_key_id = access_key_id or ALY_AK
self.access_key_secret = access_key_secret or ALY_SK
self.endpoint = endpoint or self.DEFAULT_ENDPOINT
self._client = None
@property
def client(self) -> OcrClient:
"""懒加载方式创建OCR客户端"""
if self._client is None:
self._client = self._create_client()
return self._client
def _create_client(self) -> OcrClient:
"""创建OCR客户端"""
try:
credential_config = CredentialConfig(
type='access_key',
access_key_id=self.access_key_id,
access_key_secret=self.access_key_secret
)
credential = CredentialClient(config=credential_config)
config = OpenApiModels.Config(
credential=credential,
endpoint=self.endpoint
)
return OcrClient(config)
except Exception as e:
logger.error(f"创建OCR客户端失败: {str(e)}")
raise
def recognize_question(self, image_url: str) -> Dict[str, Any]:
"""
识别图片中的试题内容
Args:
image_url: 图片URL
Returns:
识别结果字典
"""
if not image_url:
raise ValueError("图片URL不能为空")
logger.info(f"开始识别试题图片URL: {image_url}")
request = OcrModels.RecognizeEduQuestionOcrRequest()
request.url = image_url
runtime = UtilModels.RuntimeOptions()
try:
response = self.client.recognize_edu_question_ocr_with_options(request, runtime)
result = self._parse_response(response)
logger.info("试题识别成功")
return result
except Exception as error:
logger.error(f"试题识别失败: {str(error)}")
self._handle_error(error)
return {"error": str(error)}
def _parse_response(self, response) -> Dict[str, Any]:
"""解析API响应"""
if not response or not response.body:
return {"error": "API返回空响应"}
try:
# 获取响应体对象
body = response.body
# 检查响应体是否有to_map方法这是阿里云SDK中常用的对象转换方法
if hasattr(body, 'to_map'):
body_map = body.to_map()
# 检查Data字段是否为字符串如果是则尝试解析为JSON
if "Data" in body_map and isinstance(body_map["Data"], str):
try:
body_map["Data"] = json.loads(body_map["Data"])
except json.JSONDecodeError:
logger.warning("Data字段不是有效的JSON字符串")
return body_map
# 如果没有to_map方法尝试直接获取属性
result = {
"request_id": getattr(body, 'request_id', ''),
"code": getattr(body, 'code', ''),
"message": getattr(body, 'message', ''),
"Data": None
}
# 处理Data字段
data = getattr(body, 'data', None)
if data:
# 如果Data对象有to_map方法使用它
if hasattr(data, 'to_map'):
result["Data"] = data.to_map()
else:
# 否则手动构建Data字典
result["Data"] = {
"content": getattr(data, 'content', ''),
"score": getattr(data, 'score', 0),
"question_info": getattr(data, 'question_info', ''),
"angle": getattr(data, 'angle', 0)
}
return result
except Exception as e:
logger.warning(f"响应解析失败,返回原始数据: {str(e)}")
return {"raw_data": str(response.body)}
def _handle_error(self, error: Exception) -> None:
"""处理API错误"""
error_message = getattr(error, 'message', str(error))
logger.error(f"错误信息: {error_message}")
if hasattr(error, 'data') and error.data:
recommend = error.data.get("Recommend")
if recommend:
logger.info(f"诊断建议: {recommend}")
# 在实际项目中,这里可以添加更复杂的错误处理逻辑
UtilClient.assert_as_string(error_message)
if __name__ == '__main__':
"""主函数,演示试题识别功能"""
try:
recognizer = ShiTiRecognizer()
# 传入固定的图片URL
image_url = "https://dsideal.obs.cn-north-1.myhuaweicloud.com/HuangHai/Backup/ShiTi.jpg"
result = recognizer.recognize_question(image_url)
print("识别结果:")
print(json.dumps(result, indent=2, ensure_ascii=False))
# 如果需要,可以在这里添加结果处理逻辑
if "Data" in result and result["Data"]:
# 尝试多种方式获取content字段
content = ""
if isinstance(result["Data"], dict):
content = result["Data"].get("content", "")
elif hasattr(result["Data"], "get"):
content = result["Data"].get("content", "")
if content:
print(f"\n识别的试题内容: {content}")
else:
print("\n未识别到试题内容")
else:
print("\n响应中没有有效的试题数据")
except Exception as e:
logger.error(f"程序执行失败: {str(e)}")
print(f"错误: {str(e)}")