Files
dsProject/dsLightRag/Util/OcrUtil.py
2025-08-14 15:45:08 +08:00

91 lines
3.4 KiB
Python

# pip install alibabacloud_ocr_api20210707==3.1.3
# pip install alibabacloud_credentials
import json
from alibabacloud_credentials.client import Client as CredentialClient
from alibabacloud_credentials.models import Config as CredentialConfig
from alibabacloud_ocr_api20210707 import models as ocr_api_20210707_models
from alibabacloud_ocr_api20210707.client import Client as ocr_api20210707Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
from Util import LlmUtil
from Config.Config import ALY_AK, ALY_SK, ALY_OSS_PREFIX, ALY_BUCKET, ALY_ENDPOINT
from Util import OssUtil
def create_client() -> ocr_api20210707Client:
"""
使用凭据初始化账号Client
@return: Client
@throws Exception
"""
credential_config = CredentialConfig(
type='access_key',
access_key_id=ALY_AK,
access_key_secret=ALY_SK
)
credential = CredentialClient(config=credential_config)
config = open_api_models.Config(
credential=credential
)
config.endpoint = f'ocr-api.cn-hangzhou.aliyuncs.com'
return ocr_api20210707Client(config)
def recognize_handwriting(image_url: str) -> None:
"""
识别手写体文字
@param image_url: 图片URL
@return: None
"""
client = create_client()
recognize_handwriting_request = ocr_api_20210707_models.RecognizeHandwritingRequest()
runtime = util_models.RuntimeOptions()
# 显式设置Content-Type
runtime.headers = {'Content-Type': 'application/json'}
try:
print(f"正在使用图片URL: {image_url}")
# 设置图片URL参数
recognize_handwriting_request.url = image_url
except Exception as e:
print(f"设置图片URL时发生错误: {str(e)}")
return None
try:
# 调用API并打印识别结果
print("正在调用API...")
response = client.recognize_handwriting_with_options(recognize_handwriting_request, runtime)
print("识别结果:")
if hasattr(response, 'body'):
# 提取并解析data字段
if hasattr(response.body, 'data'):
try:
data = json.loads(response.body.data)
except json.JSONDecodeError as e:
print(f"解析data字段失败: {str(e)}")
except Exception as e:
print(f"处理data字段时发生错误: {str(e)}")
if hasattr(response.body, 'text'):
print(f"文本内容: {response.body.text}")
if hasattr(response.body, 'confidence'):
print(f"置信度: {response.body.confidence}")
else:
print(f"响应不包含body属性: {response}")
except Exception as error:
# 错误处理
print(f"错误信息: {error.message if hasattr(error, 'message') else str(error)}")
# 诊断地址
if hasattr(error, 'data') and error.data is not None and 'Recommend' in error.data:
print(f"建议: {error.data.get('Recommend')}")
else:
print("未找到诊断建议")
# 打印完整错误信息以便调试
print(f"完整错误: {str(error)}")
UtilClient.assert_as_string(str(error))
return data['content']